In [1]:
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine
from flask_sqlalchemy import SQLAlchemy

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report

from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import warnings
import os
%matplotlib inline

warnings.filterwarnings("ignore") #, category=DeprecationWarning)



In [2]:
# env variable at tensorflow1.4 per https://conda.io/docs/user-guide/tasks/manage-environments.html#saving-environment-variables
# and https://vsupalov.com/flask-sqlalchemy-postgres/

def get_env_variable(name):
    try:
        return os.environ[name]
    except KeyError:
        message = "Expected environment variable '{}' not set.".format(name)
        raise Exception(message)

# the values of those depend on your setup
POSTGRES_URL = get_env_variable("POSTGRES_URL")
POSTGRES_USER = get_env_variable("POSTGRES_USER")
POSTGRES_PW = get_env_variable("POSTGRES_PW")
POSTGRES_DB = get_env_variable("POSTGRES_DB")

In [3]:
DB_URL = 'postgresql+psycopg2://{user}:{pw}@{url}/{db}'.format(user=POSTGRES_USER,pw=POSTGRES_PW,url=POSTGRES_URL,db=POSTGRES_DB)

In [4]:
engine_var = DB_URL
engine = create_engine(engine_var)

In [5]:
# df = pd.read_pickle('data/kickstarter_data.pkl')
# TODO determine whie ID shows up but is not called - index issue?
df = pd.read_sql_query('''SELECT main_category, main_category_code, currency, currency_code, 
                        deadline, usd_goal_real, usd_pledged_real, launched, state, state_code, 
                        country,country_code FROM kickstarter_data''',engine)
pd.read_sql_query('''SELECT * FROM kickstarter_data LIMIT 5''',engine)

Unnamed: 0,idx,ID,name,category,main_category,currency,deadline,goal,launched,pledged,...,country,usd pledged,usd_pledged_real,usd_goal_real,campaign_length,pct_goal_achieved,main_category_code,country_code,currency_code,state_code
0,1031685482,1031685482,You Sparkle Inside: A Kids' Book + Commissions...,Children's Books,Publishing,USD,2018-01-02,2000.0,2017-12-06,6083.0,...,US,1054.0,6083.0,2000.0,27,304.15,12,21,13,1
1,1176736360,1176736360,THE NEW BILLY WYLDER RECORD,Indie Rock,Music,USD,2018-01-02,10000.0,2017-11-30,11169.56,...,US,650.0,11169.56,10000.0,33,111.6956,10,21,13,1
2,1894703097,1894703097,Driftwood Holly ***The Venice Project***,Indie Rock,Music,EUR,2018-01-02,25000.0,2017-11-28,25417.2,...,DE,0.0,30615.02,30112.5,35,101.668809,10,5,4,1
3,311863059,311863059,On a Limb's debut album,Jazz,Music,USD,2018-01-02,1000.0,2017-12-09,1743.0,...,US,0.0,1743.0,1000.0,24,174.3,10,21,13,1
4,2076121120,2076121120,Briggs Barbecue Sauces,Small Batch,Food,USD,2018-01-02,200000.0,2017-11-03,1.0,...,US,0.0,1.0,200000.0,60,0.0005,7,21,13,0


In [6]:
pd.read_sql_query('''SELECT count(name), SUM(usd_pledged_real) as USD_Pledged,SUM(usd_goal_real) as USD_Goal FROM kickstarter_data''',engine)

Unnamed: 0,count,usd_pledged,usd_goal
0,19291,262772700.0,874347900.0


In [7]:
pd.read_sql_query('''SELECT state,COUNT(state) as count, SUM(usd_goal_real) as usd_goal, SUM(usd_pledged_real) as usd_pledged FROM kickstarter_data GROUP BY state ORDER BY usd_pledged DESC LIMIT 10''',engine)

Unnamed: 0,state,count,usd_goal,usd_pledged
0,successful,8368,81238880.0,249382900.0
1,failed,10923,793109000.0,13389830.0


In [8]:
pd.read_sql_query('''SELECT main_category,COUNT(main_category) as count, SUM(usd_goal_real) as usd_goal,SUM(usd_pledged_real) as usd_pledged FROM kickstarter_data Group By main_category ORDER BY usd_pledged DESC LIMIT 5''',engine)

Unnamed: 0,main_category,count,usd_goal,usd_pledged
0,Games,2541,48148460.0,70608172.84
1,Design,2127,52197320.0,66428025.84
2,Technology,2081,106528000.0,56029162.8
3,Film & Video,1943,396225000.0,13787683.4
4,Publishing,2005,22718820.0,10911971.51


In [9]:
pd.read_sql_query('''SELECT category,COUNT(category) as count, SUM(usd_goal_real) as usd_goal,SUM(usd_pledged_real) as usd_pledged FROM kickstarter_data Group By category ORDER BY usd_pledged DESC LIMIT 5''',engine)

Unnamed: 0,category,count,usd_goal,usd_pledged
0,Tabletop Games,1281,15384499.07,59007489.07
1,Product Design,1675,34575051.56,54426113.02
2,Hardware,175,10072078.6,11177130.2
3,Design,238,4752523.21,9818868.51
4,Wearables,98,4006337.79,8721065.16


In [10]:
pd.read_sql_query('''SELECT country, COUNT(country) as count,SUM(usd_goal_real) as usd_goal,SUM(usd_pledged_real)  as usd_pledged FROM kickstarter_data GROUP BY country ORDER BY usd_pledged DESC LIMIT 5''',engine)

Unnamed: 0,country,count,usd_goal,usd_pledged
0,US,12204,672317400.0,187266300.0
1,GB,2207,46084010.0,18719780.0
2,CA,962,31229990.0,7231933.0
3,DE,519,18477280.0,6270310.0
4,NL,216,4211652.0,5824917.0


In [11]:
pd.read_sql_query('''SELECT currency,COUNT(currency) as count,SUM(usd_goal_real) as usd_goal,SUM(usd_pledged_real) as usd_pledged FROM kickstarter_data GROUP BY currency ORDER BY usd_pledged DESC LIMIT 5''',engine)

Unnamed: 0,currency,count,usd_goal,usd_pledged
0,USD,12204,672317400.0,187266300.0
1,EUR,1941,73154340.0,27642810.0
2,GBP,2207,46084010.0,18719780.0
3,CAD,962,31229990.0,7231933.0
4,AUD,581,14196350.0,5774700.0
