In [1]:
import pandas as pd

In [2]:
# The path to our CSV file
data_to_load = "KickstarterData.csv"
# Read our Kickstarter data into pandas

In [3]:
# Get a list of all of our columns for easy reference
kickstarter_df = pd.read_csv(data_to_load)
kickstarter_df.columns

Index(['id', 'photo', 'name', 'blurb', 'goal', 'pledged', 'state', 'slug',
       'disable_communication', 'country', 'currency', 'currency_symbol',
       'currency_trailing_code', 'deadline', 'state_changed_at', 'created_at',
       'launched_at', 'staff_pick', 'is_starrable', 'backers_count',
       'static_usd_rate', 'usd_pledged', 'creator', 'location', 'category',
       'profile', 'spotlight', 'urls', 'source_url', 'friends', 'is_starred',
       'is_backing', 'permissions'],
      dtype='object')

In [4]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"

name_df=kickstarter_df[["name","goal","pledged", "state", "country", "staff_pick","backers_count","spotlight"]]
name_df

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True
...,...,...,...,...,...,...,...,...
4115,Bring Love's Labour's Lost to Minnesota,25000.0,25388.0,successful,US,True,213,True
4116,Score,300.0,340.0,successful,GB,False,17,True
4117,"""The Santaland Diaries"" by David Sedaris in Lo...",6000.0,7140.0,successful,US,False,108,True
4118,REBATEnsemble Presents: ICONS - The Martin Show,300.0,312.0,successful,US,False,6,True


In [5]:
# Remove projects that made no money at all

money_made = name_df[name_df["pledged"] > 0]
money_made.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [6]:
# Collect only those projects that were hosted in the US
US_money=name_df[name_df["country"] =="US"]
US_money
# Create a list of the columns
# Create a new df for "US" with the columns above. 

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True
14,Pride Con,15000.0,15110.0,successful,US,False,60,True
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True
...,...,...,...,...,...,...,...,...
4109,Going To Market,2000.0,2100.0,successful,US,False,41,True
4110,LIBERTY! EQUALITY! AND FIREWORKS!... A Civil R...,3000.0,3506.0,successful,US,False,35,True
4115,Bring Love's Labour's Lost to Minnesota,25000.0,25388.0,successful,US,True,213,True
4117,"""The Santaland Diaries"" by David Sedaris in Lo...",6000.0,7140.0,successful,US,False,108,True


In [7]:
# Create a new column that finds the average amount pledged to a project

US_money["average pledge"]=US_money["pledged"]/US_money["backers_count"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [13]:
US_money
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average pledge
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True,172.058824
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True,106.323529
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True,76.537879
14,Pride Con,15000.0,15110.0,successful,US,False,60,True,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True,128.825000
...,...,...,...,...,...,...,...,...,...
4109,Going To Market,2000.0,2100.0,successful,US,False,41,True,51.219512
4110,LIBERTY! EQUALITY! AND FIREWORKS!... A Civil R...,3000.0,3506.0,successful,US,False,35,True,100.171429
4115,Bring Love's Labour's Lost to Minnesota,25000.0,25388.0,successful,US,True,213,True,119.192488
4117,"""The Santaland Diaries"" by David Sedaris in Lo...",6000.0,7140.0,successful,US,False,108,True,66.111111


In [9]:
# Calculate the total number of backers for all US projects

In [10]:
# Calculate the average number of backers for all US projects

In [11]:
# Collect only those US campaigns that have been picked as a "Staff Pick"

In [12]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)