In [1]:
import pandas as pd
import json
import datetime as dt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [2]:
campaigns_df = pd.DataFrame()

for i in range(0,49):
    if i < 10:
        file_location = f'raw_data/Kickstarter00{i}.csv'
    else:
        file_location = f'raw_data/Kickstarter0{i}.csv'
    new_sheet_df = pd.read_csv(file_location)
    dfs = [campaigns_df, new_sheet_df]
    campaigns_df = pd.concat(dfs)
    
campaigns_df = campaigns_df.reset_index()

In [12]:
cat_names = []
cat_ids = []
parent_cat_ids = []
creator_names = []
creator_profiles = []
start_dates = []
deadlines = []
campaign_lengths = []
states = []
    
for i in range(0, len(campaigns_df['launched_at'])):
    cat = json.loads(campaigns_df['category'][i])
    cat_id = cat['id']
    cat_name = cat['name']
    try:
        parent_cat_id = cat['parent_id']
    except KeyError:
        parent_cat_id = '0'
    cat_ids.append(cat_id)
    cat_names.append(cat_name)
    parent_cat_ids.append(parent_cat_id)
    
    creator = json.loads(campaigns_df['creator'][i])
    creator_profile = creator['urls']['web']['user']
    creator_name = creator['name']
    creator_names.append(creator_name)
    creator_profiles.append(creator_profile)
    
    start_date_ts = campaigns_df['launched_at'][i]
    deadline_ts = campaigns_df['deadline'][i]
    campaign_length = round((deadline_ts - start_date_ts)/86400,2)
    start_date = dt.datetime.fromtimestamp(int(start_date_ts)).strftime('%Y-%m-%d %H:%M:%S')
    deadline = dt.datetime.fromtimestamp(int(deadline_ts)).strftime('%Y-%m-%d %H:%M:%S')
    start_dates.append(start_date)
    deadlines.append(deadline)
    campaign_lengths.append(campaign_length)
    
    try:
        state = json.loads(campaigns_df['location'][i])['state']
    except (KeyError, TypeError):
        state = 'na'  
    states.append(state)

In [13]:
campaigns_df['category_id'] = cat_ids
campaigns_df['category_name'] = cat_names
campaigns_df['parent_category_id'] = parent_cat_ids
campaigns_df['creator_profile'] = creator_profiles
campaigns_df['creator_name'] = creator_names
campaigns_df['start_date'] = start_dates
campaigns_df['deadline'] = deadlines
campaigns_df['campaign_length'] = campaign_lengths
campaigns_df['state/province'] = states

In [14]:
features_of_interest = campaigns_df.drop(['category', 'converted_pledged_amount','created_at','creator','currency_symbol',
                                          'currency_trailing_code','current_currency','disable_communication','is_starrable',
                                          'launched_at','photo','profile','source_url','spotlight','staff_pick',
                                          'state_changed_at','static_usd_rate','urls','usd_pledged','usd_type','location',
                                          'friends','is_backing','is_starred','permissions'], axis=1)

In [15]:
features_of_interest.head()

Unnamed: 0,index,backers_count,blurb,country,currency,deadline,fx_rate,goal,id,name,...,slug,state,category_id,category_name,parent_category_id,creator_profile,creator_name,start_date,campaign_length,state/province
0,0,80,I will be an artist-in-residence at Elsewhere ...,US,USD,2012-04-19 15:16:00,1.0,2800.0,1562040083,Elsewhere Studios artist-in-residency program!,...,elsewhere-studios-artist-in-residency-program,successful,54,Mixed Media,1,https://www.kickstarter.com/profile/hilaryemer...,Hilary Emerson Lay,2012-03-28 16:14:20,21.96,CO
1,1,47,We are looking to bring a Visiting Sculptor fr...,US,USD,2012-04-20 17:06:38,1.0,3900.0,1437561817,Martin Luther King Jr. Sculpture on Campus!,...,martin-luther-king-jr-sculpture-on-campus,successful,1,Art,0,https://www.kickstarter.com/profile/1536850207,Csub Arts Humanities Matter,2012-03-28 17:06:38,23.0,CA
2,2,80,Surrealistic oil paintings capturing the metam...,US,USD,2012-04-16 22:59:00,1.0,750.0,574125813,EMERGENCE: Surreal Oil Paintings by J.J. Long,...,emergence-surreal-oil-paintings-by-jj-long,successful,23,Painting,1,https://www.kickstarter.com/profile/2039713907,J.J. Long,2012-03-28 18:01:19,19.21,MA
3,3,82,1000 Artists is a public art-making installati...,US,USD,2012-05-07 20:22:25,1.0,4500.0,858990141,1000 Artists: Presidential Inauguration 2013,...,1000-artists-presidential-inauguration-2013,successful,53,Public Art,1,https://www.kickstarter.com/profile/17146650,Andrew Purchin,2012-03-28 20:22:25,40.0,DC
4,4,31,P.M.A.F.T.W. my upcoming solo show June 2012 a...,US,USD,2012-04-02 21:57:23,1.0,1000.0,566704999,P.M.A.F.T.W.,...,pmaftw,successful,23,Painting,1,https://www.kickstarter.com/profile/1442314751,Ryan Jacob Smith,2012-03-28 21:57:23,5.0,OR


In [16]:
cat_id_list = []
cat_name_list = []

for i in range(0, len(cat_ids)):
    if cat_ids[i] in cat_id_list:
        cat_id_list = cat_id_list
    else:
        cat_id_list.append(cat_ids[i])
        cat_name_list.append(cat_names[i])
        
cat_dic = {}
for i in range(0, len(cat_id_list)):
    cat_dic[cat_id_list[i]] = cat_name_list[i]

parent_cat_names = []

for parent_id in parent_cat_ids:
    try:
        parent_cat_name = cat_dic[parent_id]
    except KeyError:
        parent_cat_name = 'none'
    parent_cat_names.append(parent_cat_name)
    
features_of_interest['parent_category'] = parent_cat_names

In [17]:
blurbs = features_of_interest["blurb"]
blurb_lengths = []
blurb_sentiments = []

for blurb in blurbs:
    try:
        blurb_length = len(blurb)
        sentiment = analyzer.polarity_scores(blurb)['compound']
    except TypeError:
        blurb_length = 0
        sentiment = 0
    blurb_sentiments.append(sentiment)
    blurb_lengths.append(blurb_length)

features_of_interest['blurb_length'] = blurb_lengths
features_of_interest['blurb_sentiment'] = blurb_sentiments

In [18]:
features_of_interest.head()

Unnamed: 0,index,backers_count,blurb,country,currency,deadline,fx_rate,goal,id,name,...,category_name,parent_category_id,creator_profile,creator_name,start_date,campaign_length,state/province,parent_category,blurb_length,blurb_sentiment
0,0,80,I will be an artist-in-residence at Elsewhere ...,US,USD,2012-04-19 15:16:00,1.0,2800.0,1562040083,Elsewhere Studios artist-in-residency program!,...,Mixed Media,1,https://www.kickstarter.com/profile/hilaryemer...,Hilary Emerson Lay,2012-03-28 16:14:20,21.96,CO,Art,118,0.0
1,1,47,We are looking to bring a Visiting Sculptor fr...,US,USD,2012-04-20 17:06:38,1.0,3900.0,1437561817,Martin Luther King Jr. Sculpture on Campus!,...,Art,0,https://www.kickstarter.com/profile/1536850207,Csub Arts Humanities Matter,2012-03-28 17:06:38,23.0,CA,none,135,0.2732
2,2,80,Surrealistic oil paintings capturing the metam...,US,USD,2012-04-16 22:59:00,1.0,750.0,574125813,EMERGENCE: Surreal Oil Paintings by J.J. Long,...,Painting,1,https://www.kickstarter.com/profile/2039713907,J.J. Long,2012-03-28 18:01:19,19.21,MA,Art,90,0.0
3,3,82,1000 Artists is a public art-making installati...,US,USD,2012-05-07 20:22:25,1.0,4500.0,858990141,1000 Artists: Presidential Inauguration 2013,...,Public Art,1,https://www.kickstarter.com/profile/17146650,Andrew Purchin,2012-03-28 20:22:25,40.0,DC,Art,117,0.0
4,4,31,P.M.A.F.T.W. my upcoming solo show June 2012 a...,US,USD,2012-04-02 21:57:23,1.0,1000.0,566704999,P.M.A.F.T.W.,...,Painting,1,https://www.kickstarter.com/profile/1442314751,Ryan Jacob Smith,2012-03-28 21:57:23,5.0,OR,Art,61,0.0
