## {{cookiecutter.project_name}}

{{cookiecutter.description}}

### Data Sources
- file1 : Description of where this file came from
- sql1 : Description of what this script is doing

### Changes
- {% now 'America/New_York', '%m-%d-%Y' %} : Started project

In [None]:
import pandas as pd
pd.set_option('display.max_rows', 6)
import numpy as np
from jiffy_utilities import PostgreSQL_Cnxn
from jiffy_utilities import jiffy_utils as ju
import matplotlib.pyplot as plt
import seaborn as sns

### File Locations
##### If using flat files -- otherwise use SQL scripts

In [None]:
today = pd.Timestamp.today()
in_file = r'./data/1. raw/FILE1'
summary_file = r'./data/3. processed/cleaned_{0}.pkl'.format(today.strftime('%Y%m%d'))

In [None]:
df_flat = pd.read_csv(in_file)
df_flat

### SQL Script Locations

In [None]:
today = pd.Timestamp.today()
conn, curs = PostgreSQL_Cnxn.connect()
sql1 = r'./sql_scripts/SQL1'
summary_file = r'./data/3. processed/cleaned_{0}.pkl'.format(today.strftime('%Y%m%d')

In [None]:
df_sql = pd.read_sql(sql1, conn)
df_sql

### Column Cleanup (likely only necessary on flat file load)

- Remove all leading and trailing spaces
- Rename the columns for consistency.

In [None]:
# https://stackoverflow.com/questions/30763351/removing-space-in-dataframe-python
df_flat.columns = [x.strip() for x in df_flat.columns]
df_flat.info()

In [None]:
cols_to_rename = {'col1': 'New_Name'}
df_flat.rename(columns=cols_to_rename, inplace=True)

### Clean Up Data Types

In [None]:
df_sql.info()

In [None]:
# Will likely need to change date column to datetime format
datetime_col = 'date'
df_sql[datetime_col] = pd.to_datetime(df_sql[datetime_col])
# df_flat[datetime_col] = pd.to_datetime(df_flat[datetime_col])

### Data Manipulation

### Save output file into processed directory

Save a file in the processed directory that is cleaned properly. It will be read in and used later for further analysis.

Other options besides pickle include:
- feather
- msgpack
- parquet

In [None]:
df.to_pickle(summary_file)