# Exporting Data

#### Loading Libraries

In [1]:
# Data Manipulation
import pandas as pd
# Numerical Computing
import numpy as np
# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

#### Loading Data

In [2]:
url = 'https://github.com/mattharrison/datasets/raw/master'\
      '/data/dirtydevil.txt'
df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,
                 sep='\t')

  df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,


In [3]:
def to_denver_time(df_, time_col, tz_col):
    return (df_
       .assign(**{tz_col: df_[tz_col].replace('MDT', 'MST7MDT')})
       .groupby(tz_col)
       [time_col]
       .transform(lambda s: pd.to_datetime(s)
           .dt.tz_localize(s.name, ambiguous=True)
           .dt.tz_convert('America/Denver'))
    )

In [4]:
def tweak_river(df_):
    return (df_
     .assign(datetime=to_denver_time(df_, 'datetime', 'tz_cd'))
     .rename(columns={'144166_00060': 'cfs',
                      '144167_00065': 'gage_height'})
     .set_index('datetime')
    )

In [5]:
dd = tweak_river(df)
dd

Unnamed: 0_level_0,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
...,...,...,...,...,...,...,...
2020-09-28 08:30:00-06:00,USGS,9333500,MDT,9.53,P,6.16,P
2020-09-28 08:45:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:00:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:15:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P


### Reading & Writing

#### Creating CSV Files

In [7]:
dd.to_csv('/Users/isisromero/desktop/dd.csv')

In [8]:
print(dd.head(5).to_csv())

datetime,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.0,A:[91],,
2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.0,A:[91],,
2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.0,A:[91],,



In [9]:
dd2 = pd.read_csv('/Users/isisromero/desktop/dd.csv', index_col='datetime')

  dd2 = pd.read_csv('/Users/isisromero/desktop/dd.csv', index_col='datetime')


#### Exporting Excel

In [11]:
# # Time-Zone Raised Error
# dd.to_excel('/Users/isisromero/desktop/dd.xlsx')

In [14]:
# Stripping Time-Zone
(dd
 .reset_index()
 .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))
 .set_index('datetime')
 .to_excel('/Users/isisromero/desktop/dd2.xlsx')
)

In [16]:
# writer = pd.ExcelWriter('/tmp/dd2.xlsx')
# dd2 = (dd    
#  .reset_index()
#  .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))
#  .set_index('datetime')
# )
# (dd2      # doctest: +SKIP
#  .loc['2010':'2010-12-31']
#  .to_excel(writer, sheet_name='2010')
# )
# (dd2     # doctest: +SKIP
#  .loc['2011':'2011-12-31'] 
#  .to_excel(writer, sheet_name='2011')
# )
# writer.save()      

#### Feather

In [17]:
dd.to_feather('/Users/isisromero/desktop/dd.fea')

In [19]:
(dd
 .reset_index()
 .to_feather('/Users/isisromero/desktop/dd.fea')
)

In [20]:
dd2 = pd.read_feather('/Users/isisromero/desktop/dd.fea')
dd2.set_index('datetime').equals(dd)

True

#### SQL

In [21]:
import sqlite3
con = sqlite3.connect('dd.db')
dd.to_sql('dd', con, if_exists='replace')  

539305

In [32]:
import sqlalchemy as sa
eng = sa.create_engine('sqlite:///dd.db')
sa_con = eng.connect()
dd2 = pd.read_sql('dd', sa_con, index_col='datetime')  
dd2.equals(dd)  

False

In [33]:
dd2

Unnamed: 0_level_0,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.00,A:[91],,
2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.00,A:[91],,
...,...,...,...,...,...,...,...
2020-09-28 08:30:00-06:00,USGS,9333500,MDT,9.53,P,6.16,P
2020-09-28 08:45:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:00:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P
2020-09-28 09:15:00-06:00,USGS,9333500,MDT,9.20,P,6.15,P


In [34]:
(dd2
 .reset_index()
 .assign(datetime=lambda df_: df_.datetime
       .dt.tz_convert('America/Denver'))
 .set_index('datetime')
 .equals(dd)
)

False

#### JSON

In [36]:
obj = dd.to_dict()

In [37]:
dd2 = pd.DataFrame.from_dict(obj)
dd.equals(dd2)

True

In [38]:
dd.to_json('/Users/isisromero/desktop/dd.json.gz')

In [40]:
# dd2 = pd.read_json('/Users/isisromero/desktop/dd.json')
# dd2

In [42]:
# dd2.equals(dd)

In [44]:
# dd3 = (dd2
#  .reset_index()
#  .rename(columns={'index':'datetime'})
#  .assign(datetime=lambda df_: df_.datetime.dt.tz_localize(tz='UTC')
#          .dt.tz_convert('America/Denver'))
#  .set_index('datetime')
# )

In [45]:
# dd3

In [46]:
# dd3.equals(dd)

In [47]:
# dd3.round(3).equals(dd)

In [48]:
# dd.head()

In [49]:
# dd.head().to_json()[:60]