In [27]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [28]:
csv_file = "incidence_rate.csv"
depression_data_df = pd.read_csv(csv_file)
depression_data_df.head()

Unnamed: 0,location_name,sex_name,age_name,cause_name,val
0,Alabama,Male,30 to 34,Eating disorders,314.16
1,Alabama,Female,70 to 74,Anxiety disorders,330.4
2,Alabama,Male,75 to 79,Anxiety disorders,332.23
3,Alabama,Female,75 to 79,Anxiety disorders,274.17
4,Alabama,Male,80 plus,Anxiety disorders,221.39


Extract Depressive Disorder and Major depressive disorder from DataFrame

In [29]:
mdd = depression_data_df.loc[depression_data_df['cause_name'].isin(['Major depressive disorder', 'Depressive disorders'])]
mdd

Unnamed: 0,location_name,sex_name,age_name,cause_name,val
90,Alabama,Female,70 to 74,Major depressive disorder,3079.96
91,Alabama,Male,75 to 79,Major depressive disorder,1852.24
92,Alabama,Female,75 to 79,Major depressive disorder,2314.34
93,Alabama,Male,80 plus,Major depressive disorder,2407.98
94,Alabama,Female,80 plus,Major depressive disorder,2964.98
95,Alabama,Male,80 to 84,Major depressive disorder,2044.13
96,Alabama,Female,80 to 84,Major depressive disorder,2350.18
98,Alabama,Male,70 to 74,Major depressive disorder,2101.85
109,Alabama,Female,65 to 69,Major depressive disorder,3836.62
110,Alabama,Male,65 to 69,Major depressive disorder,2350.31


### Create new data with select columns

In [30]:
new_depression_data_df = mdd.groupby(["location_name"])["val"].sum()
pd.options.display.float_format = '{:,.2f}'.format
depression_analysis = pd.DataFrame(new_depression_data_df)
depression_analysis

Unnamed: 0_level_0,val
location_name,Unnamed: 1_level_1
Alabama,285260.92
Alaska,306732.42
Arizona,320307.3
Arkansas,334072.65
California,257912.41
Colorado,315464.74
Connecticut,247764.57
Delaware,289044.07
District of Columbia,236904.01
Florida,280887.82


In [31]:
import pandas as pd
import pathlib as path

In [32]:
import requests

response = requests.get('https://www.currentresults.com/Weather/US/average-annual-state-sunshine.php')
response.raise_for_status()
response.text



In [33]:
tables_df = pd.read_html(response.text)
df = pd.concat(tables_df)
df

Unnamed: 0,State,Place,% Sun,Total Hours,Clear Days
0,Alabama,Birmingham,58,2641,99
1,Alaska,Anchorage,41,2061,61
2,Arizona,Tucson,85,3806,193
3,Arkansas,Fort Smith,61,2771,123
4,California,San Diego,68,3055,146
5,Colorado,Grand Junction,71,3204,136
6,Connecticut,Hartford,56,2585,82
7,Delaware,Wilmington,–,–,97
8,Florida,Tampa,66,2927,101
9,Georgia,Macon,66,2986,112


In [34]:
data=df.rename(columns={'State': 'location_name'})
data

Unnamed: 0,location_name,Place,% Sun,Total Hours,Clear Days
0,Alabama,Birmingham,58,2641,99
1,Alaska,Anchorage,41,2061,61
2,Arizona,Tucson,85,3806,193
3,Arkansas,Fort Smith,61,2771,123
4,California,San Diego,68,3055,146
5,Colorado,Grand Junction,71,3204,136
6,Connecticut,Hartford,56,2585,82
7,Delaware,Wilmington,–,–,97
8,Florida,Tampa,66,2927,101
9,Georgia,Macon,66,2986,112


In [35]:
merge_table = pd.merge(data,depression_analysis, on="location_name")
merge_table

Unnamed: 0,location_name,Place,% Sun,Total Hours,Clear Days,val
0,Alabama,Birmingham,58,2641,99,285260.92
1,Alaska,Anchorage,41,2061,61,306732.42
2,Arizona,Tucson,85,3806,193,320307.3
3,Arkansas,Fort Smith,61,2771,123,334072.65
4,California,San Diego,68,3055,146,257912.41
5,Colorado,Grand Junction,71,3204,136,315464.74
6,Connecticut,Hartford,56,2585,82,247764.57
7,Delaware,Wilmington,–,–,97,289044.07
8,Florida,Tampa,66,2927,101,280887.82
9,Georgia,Macon,66,2986,112,276781.57


### Connect to local database

In [36]:
rds_connection_string = "<xinleibai>:<XBkerry1>@localhost:5432/customer_db"
engine = create_engine(f'sql://{rds_connection_string}')


NoSuchModuleError: Can't load plugin: sqlalchemy.dialects:sql

### Check for tables

In [None]:
engine.table_names()

### Use pandas to load csv converted DataFrame into database

In [None]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [None]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [None]:
pd.read_sql_query('select * from customer_name', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [None]:
pd.read_sql_query('select * from customer_location', con=engine).head()