In [None]:
!pip install vaderSentiment

import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

Collecting vaderSentiment
[?25l  Downloading https://files.pythonhosted.org/packages/76/fc/310e16254683c1ed35eeb97386986d6c00bc29df17ce280aed64d55537e9/vaderSentiment-3.3.2-py2.py3-none-any.whl (125kB)
[K     |██▋                             | 10kB 15.3MB/s eta 0:00:01[K     |█████▏                          | 20kB 3.3MB/s eta 0:00:01[K     |███████▉                        | 30kB 3.8MB/s eta 0:00:01[K     |██████████▍                     | 40kB 4.5MB/s eta 0:00:01[K     |█████████████                   | 51kB 3.7MB/s eta 0:00:01[K     |███████████████▋                | 61kB 4.1MB/s eta 0:00:01[K     |██████████████████▏             | 71kB 4.5MB/s eta 0:00:01[K     |████████████████████▉           | 81kB 4.8MB/s eta 0:00:01[K     |███████████████████████▍        | 92kB 5.3MB/s eta 0:00:01[K     |██████████████████████████      | 102kB 5.1MB/s eta 0:00:01[K     |████████████████████████████▋   | 112kB 5.1MB/s eta 0:00:01[K     |███████████████████████████████▏| 12

# Hard encode Input Dictionary

In [None]:
input = {
    'name': 'terrible MegaBuster from Megaman X',
    'goal': 10000,
    'launched': '2015-08-11',
    'deadline': '2015-08-18',
    'backers':21,
    'main_category': 11,
    'username': 'LoginID'
}

In [None]:
input['name']

'Super MegaBuster from Megaman X'

# Make a function that takes in input dict and converts to dataframe

In [None]:
def framemaker(web_in):
# making dataframe out of dict  
  input_frame = pd.DataFrame(web_in, index=[0])

# changing datatype of start and end to date time
# adding column length of campaign
  input_frame['deadline'] = pd.to_datetime(input_frame['deadline'])
  input_frame['launched'] = pd.to_datetime(input_frame['launched'])
  input_frame['length_of_campaign'] = (input_frame['deadline'] - input_frame['launched']).dt.days

# Using a pretrained neural network to encode title to numbers
# Adding numbers to column as sentiments
  sentiments =[] 
  analyzer = SentimentIntensityAnalyzer()
  for sentence in input_frame['name']:
    vs = analyzer.polarity_scores(sentence)
    sentiments.append(vs['compound'])
  input_frame['sentiments'] = sentiments
  
  # input_frame['goal'] = (input_frame['goal'].str.split()).apply(lambda x: float(x[0].replace(',', '')))
  # input_frame['backers']= input_frame['backers'].astype(str).astype(int)

  # Dropping unecessary username column
  input_frame = input_frame.drop('username', axis=1)
  input_frame = input_frame.drop('name', axis=1)
  input_frame = input_frame.drop('launched', axis=1)
  input_frame = input_frame.drop('deadline', axis=1)

  input_frame = input_frame[['goal', 'backers', 'length_of_campaign', 'sentiments', 'main_category']]

  userinput = input_frame.iloc[[0]]

  return userinput 

In [None]:
user_input = framemaker(input)

In [None]:
user_input

Unnamed: 0,goal,backers,length_of_campaign,sentiments,main_category
0,10000,21,7,-0.4767,11


# Make function that takes in dataframe, uses model, and can make a prediction

In [None]:
!pip install category_encoders==2.*
from sklearn.model_selection import train_test_split
import category_encoders as ce
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from category_encoders import OneHotEncoder, OrdinalEncoder




In [None]:
df = pd.read_csv('cleaned_kickstarter_data.csv')

print(df.shape)
df

(999, 9)


Unnamed: 0,name,main_category,deadline,launched,goal,backers,length_of_campaign,project_success,sentiments
0,The Songs of Adelaide & Abullah,1,2015-10-09 11:36:00,2015-08-11 12:12:28,1000.0,0,58,0,0.0000
1,Where is Hank?,2,2013-02-26 00:20:50,2013-01-12 00:20:50,45000.0,3,45,0,0.0000
2,ToshiCapital Rekordz Needs Help to Complete Album,3,2012-04-16 04:24:11,2012-03-17 03:24:11,5000.0,1,30,0,0.4019
3,Community Film Project: The Art of Neighborhoo...,2,2015-08-29 01:00:00,2015-07-04 08:35:03,19500.0,14,55,0,0.0000
4,Monarch Espresso Bar,4,2016-04-01 13:38:27,2016-02-26 13:38:27,50000.0,224,35,1,0.0000
...,...,...,...,...,...,...,...,...,...
994,The 1st Motion Tracking DIY Smart Home Securit...,13,2016-12-31 03:54:32,2016-11-01 02:54:32,25000.0,397,60,1,0.6249
995,Veterans,12,2012-08-15 06:00:00,2012-07-09 05:39:06,5000.0,87,37,1,0.0000
996,MY VERY FIRST KICKSTARTER,2,2014-08-12 20:08:35,2014-07-13 20:08:35,6500.0,2,30,0,0.0000
997,This Song Is About You,3,2014-07-11 23:35:00,2014-06-11 23:35:00,40000.0,0,30,0,0.0000


In [None]:
def success_predictor(user_input):
  train, test = train_test_split(df, train_size=0.80, test_size=0.20, 
                                 stratify=df['project_success'], random_state=42)
  # select our target 
  target = 'project_success'

  # make train without our target or id
  train_features = train.drop(columns=[target])

  # make numeric features
  numeric_features = train_features.select_dtypes(include='number').columns.tolist()

  # make a cardinality feature to help filter
  cardinality = train_features.select_dtypes(exclude='number').nunique()

  # get a list of relevant categorical data
  categorical_features = cardinality[cardinality <=50].index.tolist()

  # Combine the lists 
  features = numeric_features + categorical_features

  X_train = train[features]
  y_train = train[target]
  X_test = test[features]
  y_test = test[target]
  # print(features)
  # print(X_train.shape, X_test.shape)

  lrmodel = Pipeline([
                  ('ohe', OneHotEncoder(use_cat_names=True)),
                  ('scaler', StandardScaler()),  
                  ('impute', SimpleImputer()),
                  ('classifier', LogisticRegressionCV())
                  ])
  lrmodel.fit(X_train, y_train)

  row = X_test.iloc[[4]]
  # print(X_train)
  # print('training accuracy:', lrmodel.score(X_train, y_train))
  # print('test accuracy:', lrmodel.score(X_test, y_test))
  # if lrmodel.predict(row) == 1:
  #   return 'Your Kickstarter project is likely to succeed!'
  # else:
  #   return 'Your Kickstarter project is likely to fail.'
  # print(X_test.head())
  # print(user_input)
  # print(y_test.head())
  # print(y_test.iloc[[0]])

  if lrmodel.predict(user_input) == 1:
    return 'Your Kickstarter project is likely to succeed!'
  else:
    return 'Your Kickstarter project is likely to fail.'

In [None]:
success_predictor(user_input)

'Your Kickstarter project is likely to fail.'

In [None]:
# print(X_train)
  # print('training accuracy:', lrmodel.score(X_train, y_train))
  # print('test accuracy:', lrmodel.score(X_test, y_test))