In [1]:
import io, os, sys, types, subprocess, collections

# Import Pandas
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import Plotly and Cufflinks
# Plotly username and API key should be set in environment variables
import plotly
plotly.tools.set_credentials_file(username=os.environ['PLOTLY_USERNAME'], api_key=os.environ['PLOTLY_KEY'])
import plotly.graph_objs as go
import cufflinks as cf

# Import numpy
import numpy as np

# Import SciKit decision tree
from sklearn.tree import DecisionTreeClassifier, export_graphviz

# Import Scikit cross-validation function
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV

In [2]:
# Import module to read in secure data
sys.path.append('../data/NREL')
import retrieve_data as rd

In [3]:
solar = rd.retrieve_dirks_sheet()

In [4]:
sys.path.append('utils')
import process_data as prd

In [5]:
prd.clean_data(solar)

In [11]:
print(solar['Precipitation'].value_counts())
print(solar['Climate3'].value_counts())
print(solar['Mounting'].value_counts())
print(solar['Precipitation'].value_counts()) 

summer dry      4303
desert          3321
fully humid     2373
winter dry       358
steppe           168
Semi-arid         33
monsoonal         15
polar tundra      13
Name: Precipitation, dtype: int64
Moderate       6122
Desert         3376
Hot & Humid    1143
Snow            388
Name: Climate3, dtype: int64
rack              5559
1-axis tracker    3261
roof rack          865
roof               330
single-axis         40
façade/BIPV         15
2-axis tracker       7
Name: Mounting, dtype: int64


In [28]:
# Selected climates to use for fitting decision trees
climates = ['Moderate', 'Desert', 'Hot & Humid', 'Snow']
# Selected mountings to use for fitting decision trees
mountings = ['rack', '1-axis tracker', 'roof rack', 'roof']
# Selected causes to use as a target for generated decision trees
causes = ['Hot spots', 'Encapsulant discoloration', 'Major delamination', 'Internal circuitry discoloration',
         'Fractured cells', 'Glass breakage', 'Permanent soiling', 'Diode/J-box problem']

In [29]:
def contains_mode(l, mode):
    """
    Checks if the element mode exists in l, if l is NaN, return false
    
    Args:
        l (list or pd.nan): List or NaN of reported degradation modes
        mode (string): Degradation mode to find in the list
    Returns:
        integer: 0 if l is NaN OR does not contain mode, 1 if l contains mode
    """
    if not isinstance(l, collections.Sequence) and pd.isnull(l):
        return 0
    else:
        if mode in l:
            return 1
        else:
            return 0

### Naive Bayes - Setup data and build model

In [12]:
naive_df_test = solar.loc[:, ['Climate3', 'Mounting', 'Cause (Cleaned)']]

In [31]:
naive_df = pd.DataFrame(solar.loc[:, 'Mounting'])
naive_df = naive_df.join(pd.get_dummies(solar['Mounting']))
naive_df = naive_df.join(solar.loc[:, 'Climate3'])
naive_df = naive_df.join(pd.get_dummies(solar['Climate3']))
naive_df = naive_df.join(solar.loc[:, 'Cause (Cleaned)'])

# Iterate through selected causes (defined above) and add binary 1/0 columns for each cause into the DataFrame
for c in causes:
    col_name = c
    col_location = naive_df.columns.get_loc('Cause (Cleaned)') + 1
    naive_df.insert(col_location, col_name, naive_df['Cause (Cleaned)'])
    naive_df[col_name] = naive_df[col_name].apply(lambda l: contains_mode(l, c))
    
naive_df

Unnamed: 0,Mounting,1-axis tracker,2-axis tracker,façade/BIPV,rack,roof,roof rack,single-axis,Climate3,Desert,...,Snow,Cause (Cleaned),Diode/J-box problem,Permanent soiling,Glass breakage,Fractured cells,Internal circuitry discoloration,Major delamination,Encapsulant discoloration,Hot spots
0,rack,0,0,0,1,0,0,0,Moderate,0,...,0,,0,0,0,0,0,0,0,0
1,rack,0,0,0,1,0,0,0,Moderate,0,...,0,,0,0,0,0,0,0,0,0
2,,0,0,0,0,0,0,0,Hot & Humid,0,...,0,,0,0,0,0,0,0,0,0
3,,0,0,0,0,0,0,0,Hot & Humid,0,...,0,,0,0,0,0,0,0,0,0
4,,0,0,0,0,0,0,0,Hot & Humid,0,...,0,,0,0,0,0,0,0,0,0
5,rack,0,0,0,1,0,0,0,Moderate,0,...,0,,0,0,0,0,0,0,0,0
6,,0,0,0,0,0,0,0,Moderate,0,...,0,,0,0,0,0,0,0,0,0
7,,0,0,0,0,0,0,0,Snow,0,...,1,,0,0,0,0,0,0,0,0
8,,0,0,0,0,0,0,0,Snow,0,...,1,,0,0,0,0,0,0,0,0
9,,0,0,0,0,0,0,0,Snow,0,...,1,,0,0,0,0,0,0,0,0
