In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import datetime
%matplotlib inline

In [None]:
def clean_df(df_raw) :
    df = df_raw.copy()
    df['main_category'] = df['category'].apply(
        lambda x: ast.literal_eval(x)['slug'].split('/')[0]
    )

    df['successful'] = pd.Series(df['goal'] <= df['converted_pledged_amount'],dtype=np.int8)
    df.drop(columns=['goal','converted_pledged_amount'],inplace=True)

    unix_cols = ['launched_at','deadline','state_changed_at']
    for col in unix_cols :
        df[col] = pd.to_datetime(df[col],origin='unix')

    scrape_date = datetime.datetime(2020,9,17)
    df = df[df['deadline'] < scrape_date]
    df['deadline'] = (df['deadline'] - df['launched_at']).dt.days

    drop_cols = ['blurb','country','creator','currency_symbol','current_currency',
                'country_displayable_name','creator','location','photo',
                'profile','slug','source_url','urls','is_backing','is_starred',
                'static_usd_rate','usd_pledged','usd_type','category','friends',
                 'permissions','created_at', 'currency_trailing_code']
    to_datetime_cols = []
    to_ord_cols = []
    to_one_hot_cols = ['currency']

    df.drop(columns=drop_cols,inplace=True)

    
    
    for col in to_datetime_cols :
        df[col] = pd.to_datetime(df[col],origin='unix')
    
    for col in to_one_hot_cols:
        one_hot = pd.get_dummies(df[col],prefix=col,drop_first=True)
        df = df.join(one_hot)
    df.drop(columns=to_one_hot_cols,inplace=True)

    mid = df['name']
    df.drop(labels=['name'], axis=1, inplace = True)
    df.insert(0, 'name', mid)

    return df

In [None]:
data = pd.read_csv('Kickstarter.csv')
df = clean_df(data)
df.head()

Unnamed: 0,name,backers_count,deadline,disable_communication,fx_rate,id,is_starrable,launched_at,pledged,spotlight,staff_pick,state,state_changed_at,main_category,successful,currency_CAD,currency_CHF,currency_DKK,currency_EUR,currency_GBP,currency_HKD,currency_JPY,currency_MXN,currency_NOK,currency_NZD,currency_SEK,currency_SGD,currency_USD
0,"pulsing orange, skinny youth",62,0,False,1.0,1241713622,False,1970-01-01 00:00:01.575475385,3885.0,True,False,successful,1970-01-01 00:00:01.578707400,film & video,1,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Help Tony Copeland and get free cd's and mp3's,0,0,False,1.0,1738971673,False,1970-01-01 00:00:01.276283655,0.0,False,False,failed,1970-01-01 00:00:01.280206809,music,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Public Screen Printing at the 2013 Presidentia...,20,0,False,1.0,627504848,False,1970-01-01 00:00:01.352868237,1102.0,True,False,successful,1970-01-01 00:00:01.355608832,art,1,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Help built your magical rainsforest holiday st...,1,0,False,1.0,584673239,False,1970-01-01 00:00:01.467250090,1.0,False,False,failed,1970-01-01 00:00:01.469842020,technology,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,B. collective: locally made fine art and gifts,3,0,False,1.0,2126450463,False,1970-01-01 00:00:01.549959746,5.0,False,False,failed,1970-01-01 00:00:01.552548146,art,0,0,0,0,0,0,0,0,0,0,0,0,0,1
