In [51]:
import pandas as pd
from matplotlib import pyplot as plt

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, explained_variance_score, max_error

from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.callbacks import CSVLogger

In [41]:
df = pd.read_csv("KS_train_data.csv")

In [42]:
df.head(5)

Unnamed: 0,project_id,backers_count,blurb,category,converted_pledged_amount,country,created_at,currency,deadline,fx_rate,...,launched_at,name,pledged,staff_pick,usd_pledged,location,funded,subcategory,project_url,reward_url
0,KS_000000,80,I will be an artist-in-residence at Elsewhere ...,art,3596,US,1325980505,USD,1334866560,1.0,...,1332969260,Elsewhere Studios artist-in-residency program!,3596.0,False,3596.0,"Paonia, CO",True,mixed media,https://www.kickstarter.com/projects/hilaryeme...,https://www.kickstarter.com/projects/hilaryeme...
1,KS_000001,82,1000 Artists is a public art-making installati...,art,4586,US,1330926084,USD,1336440145,1.0,...,1332984145,1000 Artists: Presidential Inauguration 2013,4586.0,False,4586.0,"Washington, DC",True,public art,https://www.kickstarter.com/projects/17146650/...,https://www.kickstarter.com/projects/17146650/...
2,KS_000002,21,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,5217,US,1332382894,USD,1338175739,1.0,...,1332991739,MyPhoneHenge,5217.0,False,5217.0,"Frisco, TX",True,sculpture,https://www.kickstarter.com/projects/belveal/m...,https://www.kickstarter.com/projects/belveal/m...
3,KS_000003,37,A film exploring the role and value of art edu...,art,7160,GB,1332342451,USD,1334806096,1.0,...,1332991696,Walk-Through,7160.0,False,7160.0,"Glasgow, UK",True,art,https://www.kickstarter.com/projects/107813091...,https://www.kickstarter.com/projects/107813091...
4,KS_000004,153,We need to build a kitchen for Habitable Space...,art,15445,US,1328562617,USD,1335584240,1.0,...,1332992240,A kitchen for Habitable Spaces,15445.0,False,15445.0,"Kingsbury, TX",True,public art,https://www.kickstarter.com/projects/104409276...,https://www.kickstarter.com/projects/104409276...


In [43]:
df = df.drop(['project_id'], axis=1)

In [44]:
df = df.drop_duplicates(subset='project_url', keep="first")

In [45]:
df = df.drop(['backers_count', 'converted_pledged_amount', 'name', 'pledged', 'usd_pledged', 
             'project_url', 'reward_url'], axis=1)

In [46]:
df.head(5)

Unnamed: 0,blurb,category,country,created_at,currency,deadline,fx_rate,goal,launched_at,staff_pick,location,funded,subcategory
0,I will be an artist-in-residence at Elsewhere ...,art,US,1325980505,USD,1334866560,1.0,2800.0,1332969260,False,"Paonia, CO",True,mixed media
1,1000 Artists is a public art-making installati...,art,US,1330926084,USD,1336440145,1.0,4500.0,1332984145,False,"Washington, DC",True,public art
2,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,US,1332382894,USD,1338175739,1.0,5000.0,1332991739,False,"Frisco, TX",True,sculpture
3,A film exploring the role and value of art edu...,art,GB,1332342451,USD,1334806096,1.0,6500.0,1332991696,False,"Glasgow, UK",True,art
4,We need to build a kitchen for Habitable Space...,art,US,1328562617,USD,1335584240,1.0,15000.0,1332992240,False,"Kingsbury, TX",True,public art


In [47]:
df['created_at'] = pd.to_datetime(df['created_at'], unit='s')
df['deadline'] = pd.to_datetime(df['deadline'], unit='s')
df['launched_at'] = pd.to_datetime(df['launched_at'], unit='s')

In [48]:
df.head(5)

Unnamed: 0,blurb,category,country,created_at,currency,deadline,fx_rate,goal,launched_at,staff_pick,location,funded,subcategory
0,I will be an artist-in-residence at Elsewhere ...,art,US,2012-01-07 23:55:05,USD,2012-04-19 20:16:00,1.0,2800.0,2012-03-28 21:14:20,False,"Paonia, CO",True,mixed media
1,1000 Artists is a public art-making installati...,art,US,2012-03-05 05:41:24,USD,2012-05-08 01:22:25,1.0,4500.0,2012-03-29 01:22:25,False,"Washington, DC",True,public art
2,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,US,2012-03-22 02:21:34,USD,2012-05-28 03:28:59,1.0,5000.0,2012-03-29 03:28:59,False,"Frisco, TX",True,sculpture
3,A film exploring the role and value of art edu...,art,GB,2012-03-21 15:07:31,USD,2012-04-19 03:28:16,1.0,6500.0,2012-03-29 03:28:16,False,"Glasgow, UK",True,art
4,We need to build a kitchen for Habitable Space...,art,US,2012-02-06 21:10:17,USD,2012-04-28 03:37:20,1.0,15000.0,2012-03-29 03:37:20,False,"Kingsbury, TX",True,public art


In [49]:
df['created_at'] = df['created_at'].dt.normalize()
df['deadline'] = df['deadline'].dt.normalize()
df['launched_at'] = df['launched_at'].dt.normalize()

In [50]:
df.head(5)

Unnamed: 0,blurb,category,country,created_at,currency,deadline,fx_rate,goal,launched_at,staff_pick,location,funded,subcategory
0,I will be an artist-in-residence at Elsewhere ...,art,US,2012-01-07,USD,2012-04-19,1.0,2800.0,2012-03-28,False,"Paonia, CO",True,mixed media
1,1000 Artists is a public art-making installati...,art,US,2012-03-05,USD,2012-05-08,1.0,4500.0,2012-03-29,False,"Washington, DC",True,public art
2,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,US,2012-03-22,USD,2012-05-28,1.0,5000.0,2012-03-29,False,"Frisco, TX",True,sculpture
3,A film exploring the role and value of art edu...,art,GB,2012-03-21,USD,2012-04-19,1.0,6500.0,2012-03-29,False,"Glasgow, UK",True,art
4,We need to build a kitchen for Habitable Space...,art,US,2012-02-06,USD,2012-04-28,1.0,15000.0,2012-03-29,False,"Kingsbury, TX",True,public art


In [51]:
df['days_to_launch'] = (df['launched_at'] - df['created_at']).dt.days
df['days_to_dealine'] = (df['deadline'] - df['launched_at']).dt.days

In [52]:
df.head(5)

Unnamed: 0,blurb,category,country,created_at,currency,deadline,fx_rate,goal,launched_at,staff_pick,location,funded,subcategory,days_to_launch,days_to_dealine
0,I will be an artist-in-residence at Elsewhere ...,art,US,2012-01-07,USD,2012-04-19,1.0,2800.0,2012-03-28,False,"Paonia, CO",True,mixed media,81,22
1,1000 Artists is a public art-making installati...,art,US,2012-03-05,USD,2012-05-08,1.0,4500.0,2012-03-29,False,"Washington, DC",True,public art,24,40
2,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,US,2012-03-22,USD,2012-05-28,1.0,5000.0,2012-03-29,False,"Frisco, TX",True,sculpture,7,60
3,A film exploring the role and value of art edu...,art,GB,2012-03-21,USD,2012-04-19,1.0,6500.0,2012-03-29,False,"Glasgow, UK",True,art,8,21
4,We need to build a kitchen for Habitable Space...,art,US,2012-02-06,USD,2012-04-28,1.0,15000.0,2012-03-29,False,"Kingsbury, TX",True,public art,52,30


In [53]:
df = df.drop(['created_at', 'deadline', 'launched_at'], axis=1)

In [54]:
df.head(5)

Unnamed: 0,blurb,category,country,currency,fx_rate,goal,staff_pick,location,funded,subcategory,days_to_launch,days_to_dealine
0,I will be an artist-in-residence at Elsewhere ...,art,US,USD,1.0,2800.0,False,"Paonia, CO",True,mixed media,81,22
1,1000 Artists is a public art-making installati...,art,US,USD,1.0,4500.0,False,"Washington, DC",True,public art,24,40
2,"The Sequel to ‘My Favorite Machine”, ""MyPhoneH...",art,US,USD,1.0,5000.0,False,"Frisco, TX",True,sculpture,7,60
3,A film exploring the role and value of art edu...,art,GB,USD,1.0,6500.0,False,"Glasgow, UK",True,art,8,21
4,We need to build a kitchen for Habitable Space...,art,US,USD,1.0,15000.0,False,"Kingsbury, TX",True,public art,52,30


In [56]:
df.to_csv("KS_train_data_clean_1.0.csv")