In [1]:
# Import Dependencies

import pandas as pd
import numpy as np
import os

In [2]:
# Import CSV and convert to dataframes
contact_df = pd.read_csv(os.path.join("Resources", "contact.csv"))
ratio_df = pd.read_csv(os.path.join("Resources", "studentratio.csv"))
SAT_df = pd.read_csv(os.path.join("Resources", "SAT.csv"))
exp_df = pd.read_csv(os.path.join("Resources", "expenditure.csv"))

#### Clean contact dataframe

In [3]:
# Drop unecessary columns
contact_df = contact_df[["DISTRICT_CODE", "GRADESPAN"]]

# Verify no missing data
contact_df.isnull().sum()

# Verify no incorrect data in grade span
contact_df.GRADESPAN.unique()

# Verify no duplicate district codes
contact_df[contact_df.duplicated(['DISTRICT_CODE'])]

# Verify data are of correct type
contact_df.dtypes

DISTRICT_CODE     int64
GRADESPAN        object
dtype: object

#### Clean ratio dataframe

In [4]:
# Drop unnecessary columns
ratio_df = ratio_df[["DistrictCode", "Student_Teacher_District"]]

# Rename columns
ratio_df = ratio_df.rename(columns={"DistrictCode": "DISTRICT_CODE", "Student_Teacher_District": "RATIO"})

# Verify no missing data
ratio_df.isnull().sum()

# Verify no duplicate district codes
ratio_df[ratio_df.duplicated(["DISTRICT_CODE"])]

# Verify no incorrect data in ratio
contact_df.RATIO.unique()

# Remove state row
ratio_df = ratio_df[ratio_df.DISTRICT_CODE != 'State']

# Fill "N" values with "Not Available"
ratio_df = ratio_df.replace("N", "Not Available")

# Cast district code as integer
ratio_df["DISTRICT_CODE"] = ratio_df["DISTRICT_CODE"].astype('int64')

# Verify data are of correct type
ratio_df.dtypes

array(['13:1', '11:1', '12:1', '6:1', '8:1', '10:1', '14:1', '7:1', '9:1',
       '3:1', '16:1', 'N', '4:1', '20:1', '18:1', '5:1', '1:1', '15:1',
       '139:1', '2:1', '17:1', '32:1', '19:1'], dtype=object)

array(['13:1', '11:1', '12:1', '6:1', '8:1', '10:1', '14:1', '7:1', '9:1',
       '3:1', '16:1', 'Not Available', '4:1', '20:1', '18:1', '5:1',
       '1:1', '15:1', '139:1', '2:1', '17:1', '32:1', '19:1'],
      dtype=object)