# Load Data

In [1]:
# import dependencies
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func

In [2]:
# reflect database table into classes
engine = create_engine("sqlite:///kickstarter_campaigns.sqlite", echo=False)

Base = automap_base()
Base.prepare(engine, reflect=True)
Kickstarter = Base.classes.kickstarter_campaigns

session = Session(engine)

In [3]:
data = engine.execute('SELECT * FROM kickstarter_campaigns')
kickstarter_df = pd.DataFrame()
record_list = []
for record in data:
    record_list.append(record)
    
kickstarter_df = pd.DataFrame(record_list)
kickstarter_df.columns = ['index','backers_count','blurb','country','currency','deadline','fx_rate','goal','id',
                          'name','pledged','slug','state','category_id','category_name','parent_category_id',
                          'creator_profile','creator_name','start_date','campaign_length','state_or_province',
                          'parent_category','blurb_length','blurb_sentiment_compound','blurb_sentiment_positive',
                          'blurb_sentiment_negative','blurb_sentiment_neutral','title_length','title_sentiment_compound',
                          'title_sentiment_positive','title_sentiment_negative','title_sentiment_neutral','usd_goal',
                          'pct_complete']

kickstarter_df = kickstarter_df.set_index('index')
print(f'{len(kickstarter_df)} rows of data loaded')
kickstarter_df.head()

188137 rows of data loaded


Unnamed: 0_level_0,backers_count,blurb,country,currency,deadline,fx_rate,goal,id,name,pledged,...,blurb_sentiment_positive,blurb_sentiment_negative,blurb_sentiment_neutral,title_length,title_sentiment_compound,title_sentiment_positive,title_sentiment_negative,title_sentiment_neutral,usd_goal,pct_complete
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,80,I will be an artist-in-residence at Elsewhere ...,US,USD,2012-04-19 15:16:00,1.0,2800.0,1562040083,Elsewhere Studios artist-in-residency program!,3596.0,...,0.0,0.0,1.0,46,0.0,0.0,0.0,1.0,2800.0,1.284286
2,47,We are looking to bring a Visiting Sculptor fr...,US,USD,2012-04-20 17:06:38,1.0,3900.0,1437561817,Martin Luther King Jr. Sculpture on Campus!,4117.0,...,0.084,0.0,0.916,43,0.0,0.0,0.0,1.0,3900.0,1.055641
3,80,Surrealistic oil paintings capturing the metam...,US,USD,2012-04-16 22:59:00,1.0,750.0,574125813,EMERGENCE: Surreal Oil Paintings by J.J. Long,3125.0,...,0.0,0.0,1.0,45,0.0,0.0,0.0,1.0,750.0,4.166667
4,82,1000 Artists is a public art-making installati...,US,USD,2012-05-07 20:22:25,1.0,4500.0,858990141,1000 Artists: Presidential Inauguration 2013,4586.0,...,0.0,0.0,1.0,44,0.0,0.0,0.0,1.0,4500.0,1.019111
5,31,P.M.A.F.T.W. my upcoming solo show June 2012 a...,US,USD,2012-04-02 21:57:23,1.0,1000.0,566704999,P.M.A.F.T.W.,1036.0,...,0.0,0.0,1.0,12,0.0,0.0,0.0,1.0,1000.0,1.036


# Create ML Models

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

Using TensorFlow backend.


In [5]:
X_numerical = kickstarter_df.drop(['blurb','country','currency','deadline','fx_rate','id','name','slug','state',
                                   'category_id','category_name','parent_category_id','creator_profile',
                                   'creator_name', 'start_date', 'state_or_province','parent_category','backers_count',
                                   'pledged', 'pct_complete', 'goal'
                                  ], axis=1)
              
X_categorical_data = [kickstarter_df['country'], kickstarter_df['currency'],kickstarter_df['category_name'],
                 kickstarter_df['parent_category']]
              
y = kickstarter_df['state']

In [6]:
X_numerical_scaler = StandardScaler().fit(X_numerical)
X_numerical_scaled = X_numerical_scaler.transform(X_numerical)

In [7]:
# FOR SOME REASON THERE WAS A PROBLEM WITH ENCODING THE STATE --- I HAVE REMOVED IT FOR NOW

country_label_encoder = LabelEncoder()
currency_label_encoder = LabelEncoder()
category_label_encoder = LabelEncoder()
parent_category_label_encoder = LabelEncoder()

country_label_encoder.fit(X_categorical_data[0])
currency_label_encoder.fit(X_categorical_data[1])
category_label_encoder.fit(X_categorical_data[2])
parent_category_label_encoder.fit(X_categorical_data[3])

country_encoded = country_label_encoder.transform(X_categorical_data[0])
currency_encoded = currency_label_encoder.transform(X_categorical_data[1])
category_encoded = category_label_encoder.transform(X_categorical_data[2])
parent_category_encoded = parent_category_label_encoder.transform(X_categorical_data[3])

country_categorical = to_categorical(country_encoded)
currency_categorical = to_categorical(currency_encoded)
category_categorical = to_categorical(category_encoded)
parent_category_categorical = to_categorical(parent_category_encoded)

In [8]:
y_label_encoder = LabelEncoder()
y_label_encoder.fit(y)
y_encoded = y_label_encoder.transform(y)
y_categorical = to_categorical(y_encoded)

In [9]:
for i in range(10000, 150000, 9999):
    print(y[i])
    print(y_categorical[i])
    print(X_categorical_data[0][i])
    print(country_categorical[i])
    print(X_categorical_data[1][i])
    print(currency_categorical[i])
    print(X_categorical_data[2][i])
    print(category_categorical[i])
    print(X_categorical_data[3][i])
    print(parent_category_categorical[i])
    print('---------------------------------')

successful
[ 0.  0.  1.]
US
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  1.]
USD
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
Comics
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
none
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
---------------------------------
successful
[ 0.  0.  1.]
US
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 

In [12]:
np.insert(X_numerical_scaled, 1, country_categorical, axis=0)

ValueError: could not broadcast input array from shape (188137,22) into shape (188137,12)