In [33]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress


# Read the City and Ride Data

attitudes = pd.read_csv("Data/duke_energy_2010.csv")
attitudes.head()

Unnamed: 0,zip,SAMPLE_ID,SAMPLE_ENERGY,SAMPLE_USAGE,SAMPLE_REVENUE,SAMPLE_HHAGE,SAMPLE_INC,SAMPLE_REGION,SAMPLE_ACCOUNT_ACTIVATION_DATE,SAMPLE_MAILING_ADDRESS,...,highenergyproblem,futurelifeconcern,ccreal,scarce,nuclear,solar,wind,muchasican,futurepower,phev
0,34689,40414551,21156,1763.0,3141.98999,6,7,SOUTH COASTAL,0,502 WHITE OAK DR,...,10.0,9.0,10.0,10.0,10.0,10.0,10.0,7.0,9.0,9.0
1,34653,40418276,3952,329.329987,586.179993,5,6,SOUTH COASTAL,0,10141 BRIAR CIR,...,10.0,10.0,10.0,9.0,1.0,10.0,10.0,9.0,5.0,4.0
2,33837,40409966,19189,1599.079956,2664.22998,6,7,SOUTH CENTRAL,0,530 VISTA OAKS WAY,...,10.0,10.0,2.0,10.0,10.0,8.0,8.0,6.0,8.0,10.0
3,33870,40410026,16647,1387.25,2145.0,5,6,SOUTH CENTRAL,0,2315 AVALON RD,...,9.0,9.0,1.0,6.0,9.0,9.0,7.0,4.0,4.0,5.0
4,33872,30312281,10374,864.5,1359.869995,7,3,SOUTH CENTRAL,0,3513 INDIAN SUMMER TRL,...,,,,,,,,,,


In [34]:
# Creating scales based on shared constructs: "greeness" "first adopter" "conserver"

greenness = []

for index, row in attitudes.iterrows():
    greenness.append(row.conscientiousrecycle + row.ccreal + row.reduceenergy + row.willingchanges)



conserver = []

for index, row in attitudes.iterrows():
    conserver.append(row.muchasican + row.realeffort  + row.payattn)
    
firstadopter = []

for index, row in attitudes.iterrows():
    firstadopter.append(row.firstone + row.phev + row.mycellphone)


In [35]:
attitudes["greenness"]=pd.Series(greenness)/4
attitudes["conserver"]=pd.Series(conserver)/3
attitudes["firstadopter"]=pd.Series(firstadopter)/3

attitudes.head(20)


Unnamed: 0,zip,SAMPLE_ID,SAMPLE_ENERGY,SAMPLE_USAGE,SAMPLE_REVENUE,SAMPLE_HHAGE,SAMPLE_INC,SAMPLE_REGION,SAMPLE_ACCOUNT_ACTIVATION_DATE,SAMPLE_MAILING_ADDRESS,...,scarce,nuclear,solar,wind,muchasican,futurepower,phev,greenness,conserver,firstadopter
0,34689,40414551,21156,1763.0,3141.98999,6,7,SOUTH COASTAL,0,502 WHITE OAK DR,...,10.0,10.0,10.0,10.0,7.0,9.0,9.0,9.75,7.666667,7.0
1,34653,40418276,3952,329.329987,586.179993,5,6,SOUTH COASTAL,0,10141 BRIAR CIR,...,9.0,1.0,10.0,10.0,9.0,5.0,4.0,,9.0,2.0
2,33837,40409966,19189,1599.079956,2664.22998,6,7,SOUTH CENTRAL,0,530 VISTA OAKS WAY,...,10.0,10.0,8.0,8.0,6.0,8.0,10.0,2.5,8.666667,
3,33870,40410026,16647,1387.25,2145.0,5,6,SOUTH CENTRAL,0,2315 AVALON RD,...,6.0,9.0,9.0,7.0,4.0,4.0,5.0,6.25,7.0,3.666667
4,33872,30312281,10374,864.5,1359.869995,7,3,SOUTH CENTRAL,0,3513 INDIAN SUMMER TRL,...,,,,,,,,,,
5,33844,30311176,1211,100.919998,284.140015,5,6,SOUTH CENTRAL,0,500 MCKAY DR,...,,,,,,,,,,
6,34639,30315451,14462,1205.170044,1875.369995,4,9,SOUTH COASTAL,0,3325 WILLISTON LOOP,...,3.0,5.0,4.0,4.0,8.0,7.0,3.0,3.25,7.333333,4.666667
7,33774,40416196,15384,1282.0,2007.380005,2,6,SOUTH COASTAL,0,11177 HAMMOCK DR,...,7.0,7.0,8.0,8.0,5.0,5.0,5.0,5.75,8.333333,3.0
8,33710,40414231,17398,1449.829956,2327.0,3,6,SOUTH COASTAL,0,5600 36TH AVE N,...,10.0,10.0,10.0,,7.0,2.0,10.0,5.25,8.0,7.333333
9,33761,40405751,6284,523.669983,840.51001,7,3,SOUTH COASTAL,0,APT 105 3055 CASA DEL SOL CIR,...,10.0,10.0,10.0,10.0,7.0,9.0,1.0,9.0,8.333333,


In [36]:
# Remove missing values

## First diagnose missingess - print total observations for each survey item
# counts only non-missing
total_rows = attitudes.count()
print(total_rows)


zip                               4705
SAMPLE_ID                         4705
SAMPLE_ENERGY                     4705
SAMPLE_USAGE                      4705
SAMPLE_REVENUE                    4705
SAMPLE_HHAGE                      4705
SAMPLE_INC                        4705
SAMPLE_REGION                     4705
SAMPLE_ACCOUNT_ACTIVATION_DATE    4705
SAMPLE_MAILING_ADDRESS            4705
SAMPLE_MAILING_CITY               4705
SAMPLE_MAILING_STATE              4705
SAMPLE_MAILING_ZIP                4705
SAMPLE_PRIMARY_PHONE              4705
SAMPLE_SECONDARY_PHONE            4705
SAMPLE_PREMISE_ADDRESS            4705
SAMPLE_PREMISE_CITY               4705
SAMPLE_PREMISE_STATE              4705
SAMPLE_PREMISE_ZIP                4705
SAMPLE_PREMISE_COUNTY             4705
SAMPLE_MONTHS                     4705
SAMPLE_DWELLING                   4705
SAMPLE_FAMCOMP                    4705
SAMPLE_HHADULTS                   4705
SAMPLE_HHCHILDS                   4705
SAMPLE_HHPEOPLE          

In [38]:
attitudes_clean= attitudes.dropna(subset=['greenness', 'conserver', 'firstadopter'])
total_rows = attitudes_clean.count()
print(total_rows)

zip                               4069
SAMPLE_ID                         4069
SAMPLE_ENERGY                     4069
SAMPLE_USAGE                      4069
SAMPLE_REVENUE                    4069
SAMPLE_HHAGE                      4069
SAMPLE_INC                        4069
SAMPLE_REGION                     4069
SAMPLE_ACCOUNT_ACTIVATION_DATE    4069
SAMPLE_MAILING_ADDRESS            4069
SAMPLE_MAILING_CITY               4069
SAMPLE_MAILING_STATE              4069
SAMPLE_MAILING_ZIP                4069
SAMPLE_PRIMARY_PHONE              4069
SAMPLE_SECONDARY_PHONE            4069
SAMPLE_PREMISE_ADDRESS            4069
SAMPLE_PREMISE_CITY               4069
SAMPLE_PREMISE_STATE              4069
SAMPLE_PREMISE_ZIP                4069
SAMPLE_PREMISE_COUNTY             4069
SAMPLE_MONTHS                     4069
SAMPLE_DWELLING                   4069
SAMPLE_FAMCOMP                    4069
SAMPLE_HHADULTS                   4069
SAMPLE_HHCHILDS                   4069
SAMPLE_HHPEOPLE          

In [39]:
attitudes_clean.to_csv("Data/attitudes_clean.csv", index=False)


In [None]:
# #Exploratory graphs

# x_axis = attitudes[['highenergyproblem']]
# y_axis = attitudes[['ccreal']]

# plt.title("RELATIONSHIP BETWEEN CONCERN ABOUT ENERGY SUPPLY AND CLIMATE CHANGE")
# plt.xlabel("Climate Concern")
# plt.ylabel("Energy Supply Concern")

# (slope, intercept) = linregress(x_axis, y_axis)
# fit = slope * x_axis + intercept

# fig, ax = plt.subplots()

# fig.suptitle("RELATIONSHIP BETWEEN ENERGY SUPPLY CONCERN AND CLIMATE", fontsize=12, fontweight="bold")

# ax.set_xlim(0, 10)
# ax.set_ylim(0, 10)

# ax.set_xlabel("Climate Concern")
# ax.set_ylabel("Energy Supply Concern")

# ax.plot(x_axis, fake, linewidth=0, marker='o')
# ax.plot(x_axis, fit, 'b--')

# plt.show()