# Time Series Analysis

### Loading Libraries

In [3]:
# ZipFiles & IO
import io
import zipfile

#URL
import urllib.request

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
import polars as pl
import polars.selectors as cs

# Data Visualization
import hvplot
import seaborn as sns
import altair as alt
import matplotlib.pyplot as plt

# XGBoost
import xgboost as xgb

# Scikit-Learn
from sklearn import decomposition

# Date & Time
from datetime import datetime
from datetime import timedelta

In [4]:
hvplot.extension('matplotlib')

### Loading Dataset

#### URL Function

In [21]:
def download_and_modify_url(url, local_filename):
    urllib.request.urlretrieve(url, local_filename)
    
    with open(local_filename, 'r') as file:
        lines = file.readlines()
    
    with open(local_filename, 'w') as file:
        for i, line in enumerate(lines):
            if i < 34 or i == 35:
                continue
            file.write(line)

In [22]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/dirtydevil.txt'

In [23]:
local_filename = '/Users/isisromero/desktop/polars/datasets/devilclean.txt'

In [24]:
download_and_modify_url(url, local_filename)

In [25]:
print(f"Saved & Modified File within {local_filename}")

Saved & Modified File within /Users/isisromero/desktop/polars/datasets/devilclean.txt


#### Tweaking Function

In [27]:
def tweak_river(df_):
    return (df_
            .select('agency_cd', 'site_no', 'tz_cd', 
                    pl.col('datetime').str.to_datetime(),
                    cfs=pl.col('144166_00060'),
                    gage_height=pl.col('144167_00065').cast(pl.Float64)
                   )
           )

In [29]:
raw = pl.read_csv('/Users/isisromero/desktop/polars/datasets/devilclean.txt', separator='\t')

In [30]:
dd = tweak_river(raw)

In [31]:
print(dd)

shape: (539_305, 6)
┌───────────┬─────────┬───────┬─────────────────────┬──────┬─────────────┐
│ agency_cd ┆ site_no ┆ tz_cd ┆ datetime            ┆ cfs  ┆ gage_height │
│ ---       ┆ ---     ┆ ---   ┆ ---                 ┆ ---  ┆ ---         │
│ str       ┆ i64     ┆ str   ┆ datetime[μs]        ┆ f64  ┆ f64         │
╞═══════════╪═════════╪═══════╪═════════════════════╪══════╪═════════════╡
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:00:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:15:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:30:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:45:00 ┆ 70.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 02:00:00 ┆ 70.0 ┆ null        │
│ …         ┆ …       ┆ …     ┆ …                   ┆ …    ┆ …           │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2020-09-28 08:30:00 ┆ 9.53 ┆ 6.16        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2020-09-28 08:45:00 ┆ 9.2  ┆ 6.15        │
│ USG

### Dates Convertion

In [33]:
# Reading as a Date
print(pl.read_csv('/Users/isisromero/desktop/polars/datasets/devilclean.txt', separator='\t', try_parse_dates=True))

shape: (539_305, 8)
┌───────────┬─────────┬─────────────┬───────┬─────────────┬─────────────┬─────────────┬────────────┐
│ agency_cd ┆ site_no ┆ datetime    ┆ tz_cd ┆ 144166_0006 ┆ 144166_0006 ┆ 144167_0006 ┆ 144167_000 │
│ ---       ┆ ---     ┆ ---         ┆ ---   ┆ 0           ┆ 0_cd        ┆ 5           ┆ 65_cd      │
│ str       ┆ i64     ┆ datetime[μs ┆ str   ┆ ---         ┆ ---         ┆ ---         ┆ ---        │
│           ┆         ┆ ]           ┆       ┆ f64         ┆ str         ┆ str         ┆ str        │
╞═══════════╪═════════╪═════════════╪═══════╪═════════════╪═════════════╪═════════════╪════════════╡
│ USGS      ┆ 9333500 ┆ 2001-05-07  ┆ MDT   ┆ 71.0        ┆ A:[91]      ┆ null        ┆ null       │
│           ┆         ┆ 01:00:00    ┆       ┆             ┆             ┆             ┆            │
│ USGS      ┆ 9333500 ┆ 2001-05-07  ┆ MDT   ┆ 71.0        ┆ A:[91]      ┆ null        ┆ null       │
│           ┆         ┆ 01:15:00    ┆       ┆             ┆            

In [34]:
# Dates Proper Conversion, as follows:
format = '%Y-%m-%d %H:%M'

In [37]:
# Contrasting All Conversion
print(raw
      .select(original=pl.col('datetime'),
              to_datetime=pl.col('datetime').str.to_datetime(format),
              to_date=pl.col('datetime').str.to_date(format),
              strptime=pl.col('datetime').str.strptime(pl.Datetime, format),
              # Line below fails
              #cast=pl.col('datetime').cast(pl.Datetime)
             )
     )

shape: (539_305, 4)
┌──────────────────┬─────────────────────┬────────────┬─────────────────────┐
│ original         ┆ to_datetime         ┆ to_date    ┆ strptime            │
│ ---              ┆ ---                 ┆ ---        ┆ ---                 │
│ str              ┆ datetime[μs]        ┆ date       ┆ datetime[μs]        │
╞══════════════════╪═════════════════════╪════════════╪═════════════════════╡
│ 2001-05-07 01:00 ┆ 2001-05-07 01:00:00 ┆ 2001-05-07 ┆ 2001-05-07 01:00:00 │
│ 2001-05-07 01:15 ┆ 2001-05-07 01:15:00 ┆ 2001-05-07 ┆ 2001-05-07 01:15:00 │
│ 2001-05-07 01:30 ┆ 2001-05-07 01:30:00 ┆ 2001-05-07 ┆ 2001-05-07 01:30:00 │
│ 2001-05-07 01:45 ┆ 2001-05-07 01:45:00 ┆ 2001-05-07 ┆ 2001-05-07 01:45:00 │
│ 2001-05-07 02:00 ┆ 2001-05-07 02:00:00 ┆ 2001-05-07 ┆ 2001-05-07 02:00:00 │
│ …                ┆ …                   ┆ …          ┆ …                   │
│ 2020-09-28 08:30 ┆ 2020-09-28 08:30:00 ┆ 2020-09-28 ┆ 2020-09-28 08:30:00 │
│ 2020-09-28 08:45 ┆ 2020-09-28 08:45:00 ┆ 2

### Combining Columns to Create