In [40]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress


# Read the City and Ride Data

attitudes = pd.read_csv("Data/duke_energy_2010.csv")
attitudes.head()

Unnamed: 0,zip,SAMPLE_ID,SAMPLE_ENERGY,SAMPLE_USAGE,SAMPLE_REVENUE,SAMPLE_HHAGE,SAMPLE_INC,SAMPLE_REGION,SAMPLE_ACCOUNT_ACTIVATION_DATE,SAMPLE_MAILING_ADDRESS,...,ccreal,scarce,nuclear,solar,wind,muchasican,futurepower,phev,Q7,_merge
0,27603,10100106,17765,1480.420044,1899.530029,4,6,NORTHERN REGION,20070731,180 TRAVEL LITE DR,...,2.0,5.0,8.0,6.0,6.0,10.0,3.0,1.0,5,matched (3)
1,27807,10100111,17818,1484.829956,1919.380005,6,4,NORTHERN REGION,19910523,7554 SQUIRREL DEN RD,...,9.0,1.0,10.0,10.0,4.0,8.0,9.0,9.0,5,matched (3)
2,27540,10100131,18013,1501.079956,1950.459961,3,8,NORTHERN REGION,20040622,729 LITTLE LEAF CT,...,1.0,8.0,5.0,8.0,8.0,4.0,8.0,5.0,6,matched (3)
3,27545,10100146,18062,1505.170044,1819.969971,3,5,NORTHERN REGION,20000417,406 PINE RUN,...,10.0,9.0,4.0,10.0,8.0,8.0,9.0,4.0,3,matched (3)
4,27616,10100171,18333,1527.75,1695.01001,4,3,NORTHERN REGION,19920313,3416 TUNAS ST,...,10.0,10.0,10.0,10.0,7.0,6.0,7.0,1.0,4,matched (3)


In [41]:
# Creating scales based on shared constructs: "greeness" "first adopter" "conserver"

greenness = []

for index, row in attitudes.iterrows():
    greenness.append(row.conscientiousrecycle + row.ccreal + row.reduceenergy + row.willingchanges)



conserver = []

for index, row in attitudes.iterrows():
    conserver.append(row.muchasican + row.realeffort  + row.payattn)
    
firstadopter = []

for index, row in attitudes.iterrows():
    firstadopter.append(row.firstone + row.phev + row.mycellphone)


In [42]:
attitudes["greenness"]=pd.Series(greenness)/4
attitudes["conserver"]=pd.Series(conserver)/3
attitudes["firstadopter"]=pd.Series(firstadopter)/3

attitudes.head(20)


Unnamed: 0,zip,SAMPLE_ID,SAMPLE_ENERGY,SAMPLE_USAGE,SAMPLE_REVENUE,SAMPLE_HHAGE,SAMPLE_INC,SAMPLE_REGION,SAMPLE_ACCOUNT_ACTIVATION_DATE,SAMPLE_MAILING_ADDRESS,...,solar,wind,muchasican,futurepower,phev,Q7,_merge,greenness,conserver,firstadopter
0,27603,10100106,17765,1480.420044,1899.530029,4,6,NORTHERN REGION,20070731,180 TRAVEL LITE DR,...,6.0,6.0,10.0,3.0,1.0,5,matched (3),3.75,8.333333,5.333333
1,27807,10100111,17818,1484.829956,1919.380005,6,4,NORTHERN REGION,19910523,7554 SQUIRREL DEN RD,...,10.0,4.0,8.0,9.0,9.0,5,matched (3),7.0,7.333333,6.333333
2,27540,10100131,18013,1501.079956,1950.459961,3,8,NORTHERN REGION,20040622,729 LITTLE LEAF CT,...,8.0,8.0,4.0,8.0,5.0,6,matched (3),5.5,6.0,7.666667
3,27545,10100146,18062,1505.170044,1819.969971,3,5,NORTHERN REGION,20000417,406 PINE RUN,...,10.0,8.0,8.0,9.0,4.0,3,matched (3),8.0,8.666667,4.333333
4,27616,10100171,18333,1527.75,1695.01001,4,3,NORTHERN REGION,19920313,3416 TUNAS ST,...,10.0,7.0,6.0,7.0,1.0,4,matched (3),8.5,7.333333,3.666667
5,27609,10100211,18734,1561.170044,1853.449951,5,9,NORTHERN REGION,19920814,2109 TREVERTON PL,...,5.0,3.0,10.0,9.0,5.0,5,matched (3),8.75,9.666667,6.333333
6,27612,10100241,19144,1595.329956,1959.550049,4,0,NORTHERN REGION,19890727,2421 BASIL DR,...,5.0,5.0,6.0,9.0,10.0,6,matched (3),8.5,8.666667,5.666667
7,27573,10100261,19334,1611.170044,2082.909912,5,7,NORTHERN REGION,20071011,510 W GORDON ST,...,10.0,10.0,8.0,8.0,8.0,5,matched (3),7.75,7.333333,5.333333
8,27603,10100276,19417,1618.079956,2001.609985,5,7,NORTHERN REGION,19871218,537 MAPLE LN,...,9.0,9.0,8.0,8.0,7.0,5,matched (3),8.5,8.666667,7.333333
9,27606,10100291,19498,1624.829956,1966.47998,5,6,NORTHERN REGION,19770610,4609 WOODSIDE CT,...,8.0,8.0,7.0,7.0,2.0,5,matched (3),6.5,7.0,3.0


In [43]:
# Remove missing values

## First diagnose missingess - print total observations for each survey item
# counts only non-missing
total_rows = attitudes.count()
print(total_rows)


zip                               4705
SAMPLE_ID                         4705
SAMPLE_ENERGY                     4705
SAMPLE_USAGE                      4705
SAMPLE_REVENUE                    4705
SAMPLE_HHAGE                      4705
SAMPLE_INC                        4705
SAMPLE_REGION                     4705
SAMPLE_ACCOUNT_ACTIVATION_DATE    4705
SAMPLE_MAILING_ADDRESS            4705
SAMPLE_MAILING_CITY               4705
SAMPLE_MAILING_STATE              4705
SAMPLE_MAILING_ZIP                4705
SAMPLE_PRIMARY_PHONE              4705
SAMPLE_SECONDARY_PHONE            4705
SAMPLE_PREMISE_ADDRESS            4705
SAMPLE_PREMISE_CITY               4705
SAMPLE_PREMISE_STATE              4705
SAMPLE_PREMISE_ZIP                4705
SAMPLE_PREMISE_COUNTY             4705
SAMPLE_MONTHS                     4705
SAMPLE_DWELLING                   4705
SAMPLE_FAMCOMP                    4705
SAMPLE_HHADULTS                   4705
SAMPLE_HHCHILDS                   4705
SAMPLE_HHPEOPLE          

In [44]:
attitudes_clean= attitudes.dropna(subset=['greenness', 'conserver', 'firstadopter'])
total_rows = attitudes_clean.count()
print(total_rows)

zip                               4069
SAMPLE_ID                         4069
SAMPLE_ENERGY                     4069
SAMPLE_USAGE                      4069
SAMPLE_REVENUE                    4069
SAMPLE_HHAGE                      4069
SAMPLE_INC                        4069
SAMPLE_REGION                     4069
SAMPLE_ACCOUNT_ACTIVATION_DATE    4069
SAMPLE_MAILING_ADDRESS            4069
SAMPLE_MAILING_CITY               4069
SAMPLE_MAILING_STATE              4069
SAMPLE_MAILING_ZIP                4069
SAMPLE_PRIMARY_PHONE              4069
SAMPLE_SECONDARY_PHONE            4069
SAMPLE_PREMISE_ADDRESS            4069
SAMPLE_PREMISE_CITY               4069
SAMPLE_PREMISE_STATE              4069
SAMPLE_PREMISE_ZIP                4069
SAMPLE_PREMISE_COUNTY             4069
SAMPLE_MONTHS                     4069
SAMPLE_DWELLING                   4069
SAMPLE_FAMCOMP                    4069
SAMPLE_HHADULTS                   4069
SAMPLE_HHCHILDS                   4069
SAMPLE_HHPEOPLE          

In [48]:
attitudes_clean.to_csv("Data/attitudes_clean2.csv", index=False)


In [None]:
# #Exploratory graphs

# x_axis = attitudes[['highenergyproblem']]
# y_axis = attitudes[['ccreal']]

# plt.title("RELATIONSHIP BETWEEN CONCERN ABOUT ENERGY SUPPLY AND CLIMATE CHANGE")
# plt.xlabel("Climate Concern")
# plt.ylabel("Energy Supply Concern")

# (slope, intercept) = linregress(x_axis, y_axis)
# fit = slope * x_axis + intercept

# fig, ax = plt.subplots()

# fig.suptitle("RELATIONSHIP BETWEEN ENERGY SUPPLY CONCERN AND CLIMATE", fontsize=12, fontweight="bold")

# ax.set_xlim(0, 10)
# ax.set_ylim(0, 10)

# ax.set_xlabel("Climate Concern")
# ax.set_ylabel("Energy Supply Concern")

# ax.plot(x_axis, fake, linewidth=0, marker='o')
# ax.plot(x_axis, fit, 'b--')

# plt.show()