In [1]:
import requests
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import cm as cm

import seaborn as sns 
import datetime
from datetime import datetime

# 1 Data Gathering - SolarView

https://github.com/social-energy-atlas/solarview-data

## 1.1 Installations

## 1.4 Zillow Valuation Information

In [2]:
zillow = pd.read_csv('data/sea-county-zillow.csv', encoding='utf-8')
zillow.head(5)

Unnamed: 0,fips,med-zhvi,med-zrvi
0,13001,,
1,13003,,
2,13005,,
3,13007,,
4,13009,$66.08,$0.62


# 2 Data Assessment
## 2.1 Installations

## 2.4 Zillow Valuation Information

In [3]:
zillow.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 3 columns):
fips        159 non-null int64
med-zhvi    84 non-null object
med-zrvi    117 non-null object
dtypes: int64(1), object(2)
memory usage: 3.8+ KB


In [4]:
zillow.head(5)

Unnamed: 0,fips,med-zhvi,med-zrvi
0,13001,,
1,13003,,
2,13005,,
3,13007,,
4,13009,$66.08,$0.62


- med-zhvi and med-zrvi should both be floats
- Multiple NaN values exist for med-zhvi and med-zrvi

In [5]:
zillow.isnull().sum()

fips         0
med-zhvi    75
med-zrvi    42
dtype: int64

In [6]:
zillow.duplicated().sum()

0

# 3 Data Cleaning

## 3.1 Copy All Dataframes

### 3.1.1 Define
Copy all dataframes for cleaning purposes.

#### 3.1.1.2 Code

In [14]:
zillow_clean_zhvi = zillow.copy()
zillow_clean_zrvi = zillow.copy()


#### 3.1.1.3 Test

In [15]:
zillow_clean_zhvi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 3 columns):
fips        159 non-null int64
med-zhvi    84 non-null object
med-zrvi    117 non-null object
dtypes: int64(1), object(2)
memory usage: 3.8+ KB


In [16]:
zillow_clean_zrvi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 3 columns):
fips        159 non-null int64
med-zhvi    84 non-null object
med-zrvi    117 non-null object
dtypes: int64(1), object(2)
memory usage: 3.8+ KB


## 3.2 Column Naming
### 3.2.1 Installations
#### 3.2.1.1 Define
Rename all columns so that hyphens are replaced with underscores so that future codes can be run successfully, as well as correcting any spelling errors.

#### 3.2.1.2 Code

In [17]:
zillow_clean_zhvi = zillow_clean_zhvi.rename(columns={"med-zhvi": "med_zhvi", 
                                                "med-zrvi": "med_zrvi"})

In [18]:
zillow_clean_zrvi = zillow_clean_zhvi.rename(columns={"med-zhvi": "med_zhvi", 
                                                "med-zrvi": "med_zrvi"})

#### 3.2.1.3 Test

In [19]:
zillow_clean_zhvi.head()

Unnamed: 0,fips,med_zhvi,med_zrvi
0,13001,,
1,13003,,
2,13005,,
3,13007,,
4,13009,$66.08,$0.62


In [21]:
zillow_clean_zrvi.head()

Unnamed: 0,fips,med_zhvi,med_zrvi
0,13001,,
1,13003,,
2,13005,,
3,13007,,
4,13009,$66.08,$0.62


Drop med_zrvi on zillow_clean_zhvi

In [22]:
zillow_clean_zhvi.drop(["med_zrvi"], axis = 1, inplace = True) 

In [23]:
zillow_clean_zhvi.head(5)

Unnamed: 0,fips,med_zhvi
0,13001,
1,13003,
2,13005,
3,13007,
4,13009,$66.08


Drop med_zhvi on zillow_clean_zrvi

In [24]:
zillow_clean_zrvi.drop(["med_zhvi"], axis = 1, inplace = True) 

In [25]:
zillow_clean_zrvi.head(4)

Unnamed: 0,fips,med_zrvi
0,13001,
1,13003,
2,13005,
3,13007,


### 3.2.2 Null Values
#### 3.2.2.1 Define
Drop null value observations

#### 3.2.2.2 Code

In [27]:
zillow_clean_zhvi['med_zhvi'].isnull().sum()

75

In [28]:
zillow_clean_zhvi = zillow_clean_zhvi.dropna()

#### 3.2.2.3 Test

In [30]:
zillow_clean_zhvi.isnull().sum()

fips        0
med_zhvi    0
dtype: int64

### 3.2.2 Null Values
#### 3.2.2.1 Define
Drop null value observations

#### 3.2.2.2 Code

In [31]:
zillow_clean_zrvi['med_zrvi'].isnull().sum()

42

In [32]:
zillow_clean_zrvi = zillow_clean_zrvi.dropna()

#### 3.2.2.3 Test

In [33]:
zillow_clean_zrvi.isnull().sum()

fips        0
med_zrvi    0
dtype: int64

## Store Data

In [34]:
zillow_clean_zhvi.to_csv('clean_data/zillow_clean_zhvi.csv', encoding='utf-8', index=False)

In [35]:
zillow_clean_zrvi.to_csv('clean_data/zillow_clean_zrvi.csv', encoding='utf-8', index=False)