In [1]:
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# The path to our CSV file
path = "KickstarterData.csv"

# Read our Kickstarter data into pandas
kickstarter_df = pd.read_csv(path)
kickstarter_df.head()

Unnamed: 0,id,photo,name,blurb,goal,pledged,state,slug,disable_communication,country,...,location,category,profile,spotlight,urls,source_url,friends,is_starred,is_backing,permissions
0,1645666704,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",The Class Act Players Theatre Company Presents...,The Class Act Players put on another one of th...,1500.0,2925.0,successful,the-class-act-players-theatre-company-presents...,False,US,...,"{""country"":""US"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
1,874638240,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,"A brand new play about love and entitlement, b...",2500.0,2936.0,successful,mr-incredible-by-camilla-whitehill-vault-festival,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
2,247074984,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",RUN,Yonni's pissed off in a world filled with scho...,1000.0,1200.0,successful,run-10,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
3,1941196813,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",9th International Meeting of Youth Theatre sap...,27. April bis 1. Mai 2016 in Brixen/Südtirol/I...,2000.0,2135.0,successful,9th-international-meeting-of-youth-theatre-sap...,False,IT,...,"{""country"":""IT"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
4,421961595,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",Get Conti to the Ed Fringe!,The Italia Conti 2nd years are going to Ed Fri...,1000.0,1250.0,successful,get-conti-to-the-ed-fringe,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,


In [3]:
# Get a list of all of our columns for easy reference
kickstarter_df.columns

Index(['id', 'photo', 'name', 'blurb', 'goal', 'pledged', 'state', 'slug',
       'disable_communication', 'country', 'currency', 'currency_symbol',
       'currency_trailing_code', 'deadline', 'state_changed_at', 'created_at',
       'launched_at', 'staff_pick', 'is_starrable', 'backers_count',
       'static_usd_rate', 'usd_pledged', 'creator', 'location', 'category',
       'profile', 'spotlight', 'urls', 'source_url', 'friends', 'is_starred',
       'is_backing', 'permissions'],
      dtype='object')

In [4]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"
sliced_kickstarter_df = kickstarter_df[["name", "goal", "pledged", "state", "country", "staff_pick",
                                        "backers_count", "spotlight"]]
sliced_kickstarter_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [5]:
# Remove projects that made no money at all
profitable_df = sliced_kickstarter_df.loc[sliced_kickstarter_df["pledged"] > 0]
profitable_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [6]:
# Collect only those projects that were hosted in the US
# Create a list of the columns
# Create a new df for "US" with the columns above.
us_profitable_df = profitable_df.loc[profitable_df["country"] == "US"]
us_profitable_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True
14,Pride Con,15000.0,15110.0,successful,US,False,60,True
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True


In [7]:
# Create a new column that finds the average amount pledged to a project
us_profitable_df["average_donation"] = us_profitable_df.loc[:,"pledged"]/us_profitable_df.loc[:,"backers_count"]
us_profitable_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True,172.058824
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True,106.323529
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True,76.537879
14,Pride Con,15000.0,15110.0,successful,US,False,60,True,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True,128.825


In [8]:
# First convert "average_donation", "goal", and "pledged" columns to float
# pd.to_numeric(us_profitable_df["average_donation"])
# pd.to_numeric(us_profitable_df["goal"])
# pd.to_numeric(us_profitable_df["pledged"])


# Then Format to go to two decimal places, include a dollar sign, and use comma notation
us_profitable_df["average_donation"] = us_profitable_df["average_donation"].map("${:.2f}".format)
us_profitable_df["goal"] = us_profitable_df["goal"].map("${:.2f}".format)
us_profitable_df["pledged"] = us_profitable_df["pledged"].map("${:.2f}".format)
us_profitable_df.dtypes
us_profitable_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


name                object
goal                object
pledged             object
state               object
country             object
staff_pick            bool
backers_count        int64
spotlight             bool
average_donation    object
dtype: object

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,$1500.00,$2925.00,successful,US,False,17,True,$172.06
8,Forefront Festival 2015,$7200.00,$7230.00,successful,US,False,68,True,$106.32
11,Hamlet the Hip-Hopera,$9747.00,$10103.00,successful,US,True,132,True,$76.54
14,Pride Con,$15000.00,$15110.00,successful,US,False,60,True,$251.83
15,En Garde Arts Emerging Artists Festival BOSSS,$10000.00,$10306.00,successful,US,True,80,True,$128.82
17,"My Life in Michaels, a short play",$1200.00,$1265.00,successful,US,False,19,True,$66.58
19,MSGA Junior Theater Festival 2016,$1000.00,$1300.00,successful,US,False,17,True,$76.47
20,A Kingdom for a Stage! - Much Ado...,$5000.00,$6475.00,successful,US,False,105,True,$61.67
31,Bartleby: a new musical at the MN Fringe,$2000.00,$2625.00,successful,US,False,25,True,$105.00
32,"The Point, a show in the Women's Voices Theate...",$3150.00,$3150.00,successful,US,False,22,True,$143.18


In [13]:
# Calculate the total number of backers for all US projects
total_us_backers = us_profitable_df["backers_count"].sum()
total_us_backers

89273

In [16]:
# Calculate the average number of backers for all US projects
avg_us_backers = us_profitable_df["backers_count"].mean()
round(avg_us_backers, 2)

41.93

In [18]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
staff_pick_us_profitable = us_profitable_df.loc[us_profitable_df["staff_pick"] == True]
staff_pick_us_profitable.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
11,Hamlet the Hip-Hopera,$9747.00,$10103.00,successful,US,True,132,True,$76.54
15,En Garde Arts Emerging Artists Festival BOSSS,$10000.00,$10306.00,successful,US,True,80,True,$128.82
39,"""Poor People"" at FringeNYC 2015",$5500.00,$5682.00,successful,US,True,34,True,$167.12
44,Queen Mab's Steampunk and Fairie Street Festival,$1300.00,$3363.00,successful,US,True,62,True,$54.24
45,RAFT: a new play by Emily Kitchens,$7500.00,$7826.00,successful,US,True,120,True,$65.22


In [25]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)
grouped_staff_pick = staff_pick_us_profitable.groupby("state")
grouped_staff_pick["goal"].count()

state
canceled        6
failed         21
live            2
successful    145
Name: goal, dtype: int64