In [1]:
import pandas as pd

In [2]:
# The path to our CSV file
file = "Resources/CrowdfundingData.csv"

# Read our Crowdfunding data into pandas
df = pd.read_csv(file)
df.head()


Unnamed: 0,id,name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category
0,0,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1448690400,1450159200,False,False,food/food trucks
1,1,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1408424400,1408597200,False,True,music/rock
2,2,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1384668000,1384840800,False,False,technology/web
3,3,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1565499600,1568955600,False,False,music/rock
4,4,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1547964000,1548309600,False,False,theater/plays


In [3]:
# Get a list of all of our columns for easy reference
df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [4]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"

reduced_crowdfunding_df = df.loc[:,["name", "goal", "pledged", "outcome", "country", "staff_pick", "backers_count", "spotlight"]]
reduced_crowdfunding_df


Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [7]:
# Remove projects that made no money at all
reduced_crowdfunding_df = reduced_crowdfunding_df.loc[reduced_crowdfunding_df["pledged"] > 0]
reduced_crowdfunding_df

SyntaxError: invalid syntax (4154326878.py, line 3)

In [11]:
# Collect only those projects that were hosted in the US
# Create a list of the columns

columns = ["name", "goal", "pledged", "outcome", "staff_pick", "backers_count", "spotlight"]


# Create a new df for "US" with the columns above. 

us_crowdfunding_df = reduced_crowdfunding_df.loc[reduced_crowdfunding_df["country"] == "US", columns]
us_crowdfunding_df.head()

Unnamed: 0,name,goal,pledged,outcome,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,False,158,True
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,False,24,False
4,Larson-Little,7600,5265,failed,False,53,False
9,"Rangel, Holt and Jones",6200,3208,failed,False,44,False
10,Green Ltd,5200,13838,successful,False,220,False


In [12]:
# Create a new column that finds the average amount pledged to a project

us_crowdfunding_df["average_donation"] = us_crowdfunding_df["pledged"] / us_crowdfunding_df["backers_count"]
us_crowdfunding_df.head()

Unnamed: 0,name,goal,pledged,outcome,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,1400,14560,successful,False,158,True,92.151899
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,False,24,False,103.208333
4,Larson-Little,7600,5265,failed,False,53,False,99.339623
9,"Rangel, Holt and Jones",6200,3208,failed,False,44,False,72.909091
10,Green Ltd,5200,13838,successful,False,220,False,62.9


In [13]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

us_crowdfunding_df["average_donation"] = us_crowdfunding_df["average_donation"].map("${:,.2f}".format)
us_crowdfunding_df["goal"] = us_crowdfunding_df["goal"].map("${:,.2f}".format)
us_crowdfunding_df["pledged"] = us_crowdfunding_df["pledged"].map("${:,.2f}".format)

In [14]:
us_crowdfunding_df.head()

Unnamed: 0,name,goal,pledged,outcome,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,"$1,400.00","$14,560.00",successful,False,158,True,$92.15
3,"Mcdonald, Gonzalez and Ross","$4,200.00","$2,477.00",failed,False,24,False,$103.21
4,Larson-Little,"$7,600.00","$5,265.00",failed,False,53,False,$99.34
9,"Rangel, Holt and Jones","$6,200.00","$3,208.00",failed,False,44,False,$72.91
10,Green Ltd,"$5,200.00","$13,838.00",successful,False,220,False,$62.90


In [15]:
# Calculate the total number of backers for all US projects
us_crowdfunding_df["backers_count"].sum()

545510

In [16]:
# Calculate the average number of backers for all US projects
us_crowdfunding_df["backers_count"].mean()

715.8923884514436

In [17]:
# Collect only those US campaigns that have been picked as a "Staff Pick"

staff_picks_df = us_crowdfunding_df.loc[us_crowdfunding_df["staff_pick"] == True]
staff_picks_df.head()

Unnamed: 0,name,goal,pledged,outcome,staff_pick,backers_count,spotlight,average_donation
76,"Martin, Conway and Larsen","$122,900.00","$95,993.00",failed,True,1684,True,$57.00
86,Davis-Smith,"$7,400.00","$12,405.00",successful,True,203,False,$61.11
193,"Calhoun, Rogers and Long","$6,600.00","$3,012.00",failed,True,65,False,$46.34
205,Weaver-Marquez,"$1,300.00","$5,614.00",successful,True,80,False,$70.17
220,Owens-Le,"$7,900.00",$667.00,failed,True,17,False,$39.24


In [19]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)

stackpick_groups = staff_picks_df.groupby("outcome")
print(stackpick_groups["name"].count())

outcome
canceled       3
failed        13
successful    23
Name: name, dtype: int64
