In [1]:
import pandas as pd
from sqlalchemy import create_engine

### Store CSV into DataFrame

In [2]:
csv_file = "Resources/AKC Breed Info.csv"
size_df = pd.read_csv(csv_file, encoding='iso-8859-1')
size_df.head()

Unnamed: 0,Breed,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs
0,Akita,26,28,80,120
1,Anatolian Sheepdog,27,29,100,150
2,Bernese Mountain Dog,23,27,85,110
3,Bloodhound,24,26,80,120
4,Borzoi,26,28,70,100


### Create new data with select columns

In [3]:
size_df_1 = size_df['Breed'].replace("\x89ÛÒ", "")

size_df['height_low_inches'] = pd.to_numeric(size_df['height_low_inches'], errors='coerce')
size_df['height_high_inches'] = pd.to_numeric(size_df['height_high_inches'], errors='coerce')
size_df['weight_low_lbs'] = pd.to_numeric(size_df['weight_low_lbs'], errors='coerce')
size_df['weight_high_lbs'] = pd.to_numeric(size_df['weight_high_lbs'], errors='coerce')

size_df['Average Height'] = (size_df['height_low_inches'] + size_df['height_high_inches']) / (2)
size_df['Average Weight'] = (size_df['weight_low_lbs'] + size_df['weight_high_lbs']) / (2)

summary_size_df = size_df[['Breed', 'Average Height', 'Average Weight']].copy()
summary_size_df.head()

Unnamed: 0,Breed,Average Height,Average Weight
0,Akita,27.0,100.0
1,Anatolian Sheepdog,28.0,125.0
2,Bernese Mountain Dog,25.0,97.5
3,Bloodhound,25.0,100.0
4,Borzoi,27.0,85.0


In [4]:
for index, row in summary_size_df.iterrows():
    if 'ÛÒ' in row['Breed']:
        breed = row['Breed'].replace('\x89ÛÒ ',"(")
        breed = breed+")"
        summary_size_df.loc[index,'Breed'] = breed

### Connect to local database

In [5]:
import pymysql
pymysql.install_as_MySQLdb()

rds_connection_string = "root:Courtdata8*@127.0.0.1/ultimutt_db"
engine = create_engine(f'mysql://{rds_connection_string}')

### Check for tables

In [6]:
engine.table_names()

['ultimutt_class', 'ultimutt_size']

### Use pandas to load csv converted DataFrame into database

In [7]:
summary_size_df.to_sql(name='ultimutt_size', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the ultimutt_size table

In [8]:
pd.read_sql_query('select * from ultimutt_size', con=engine).head()

Unnamed: 0,id,Breed,Average Height,Average Weight
0,1,Akita,27.0,100.0
1,2,Anatolian Sheepdog,28.0,125.0
2,3,Bernese Mountain Dog,25.0,98.0
3,4,Bloodhound,25.0,100.0
4,5,Borzoi,27.0,85.0
