In [31]:
import pandas as pd
import os

In [32]:
# The path to our CSV file
file_to_load = os.path.join("KickstarterData.csv")
# Read our Kickstarter data into pandas
kickstarter_df = pd.read_csv(file_to_load)

In [35]:
# Get a list of all of our columns for easy reference
df_columns = kickstarter_df.columns

In [37]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"
new_df = kickstarter_df.loc[:,["name", "goal", "pledged", "state", "country", "staff_pick","backers_count", "spotlight"]]
new_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [38]:
# Remove projects that made no money at all
new_df = new_df.loc[(new_df["pledged"]>0)]
new_df.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [39]:
# Collect only those projects that were hosted in the US
new_df = new_df.loc[(new_df["country"] == "US")]
# Create a list of the columns
# Create a new df for "US" with the columns above. 

In [40]:
# Create a new column that finds the average amount pledged to a project
new_df["Average Pledged"] = new_df["pledged"]/new_df["backers_count"]

In [41]:
# First convert "average_donation", "goal", and "pledged" columns to float
new_df["average pledged"] = new_df["Average Pledged"].astype(float).map("${:,.2f}".format)
new_df["goal"] = new_df["goal"].astype(float).map("${:,.2f}".format)
new_df["pledged"] = new_df["pledged"].astype(float).map("${:,.2f}".format)
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

In [42]:
# Calculate the total number of backers for all US projects
new_df["backers_count"].sum()

89273

In [43]:
# Calculate the average number of backers for all US projects
new_df["backers_count"].mean()

41.931892907468296

In [44]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
new_staff_pick_df = new_df.loc[new_df["staff_pick"]== True]

In [45]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)
new_state_df = new_staff_pick_df.groupby("state").count()