In [3]:
import pandas as pd
from pathlib import Path

In [4]:
# The path to our CSV file
csvpath = Path("Resources/Crowdfunding_data.csv")
# Read our Crowdfunding data into pandas
crowdfund_df = pd.read_csv(csvpath)
crowdfund_df.head()

Unnamed: 0,id,name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category
0,0,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1448690400,1450159200,False,False,food/food trucks
1,1,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1408424400,1408597200,False,True,music/rock
2,2,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1384668000,1384840800,False,False,technology/web
3,3,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1565499600,1568955600,False,False,music/rock
4,4,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1547964000,1548309600,False,False,theater/plays


In [6]:
# Get a list of all of our columns for easy reference
crowdfund_df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [9]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
reduce_df = crowdfund_df[[ "name", "goal", "pledged", "outcome", "country", "staff_pick",
"backers_count","spotlight"]]
reduce_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [15]:
# Remove projects that made no money at all
reduce_df = reduce_df.loc[reduce_df["pledged"] > 0]
reduce_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
5,Harris Group,7600,13195,successful,DK,False,174,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [31]:
# Collect only those projects that were hosted in the US.

us_df = reduce_df.loc[reduce_df["country"] == "US"]
# Create a list of the column

#  Create a new df for "US" with the columns. 
us_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False
10,Green Ltd,5200,13838,successful,US,False,220,False


In [32]:
# Create a new column that finds the average amount pledged to a project
us_df.loc[:,"Average Amount"] = us_df.loc[:,"pledged"] / us_df.loc[:,"backers_count"]
us_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  us_df.loc[:,"Average Amount"] = us_df.loc[:,"pledged"] / us_df.loc[:,"backers_count"]


Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,Average Amount
1,Odom Inc,1400,14560,successful,US,False,158,True,92.151899
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False,103.208333
4,Larson-Little,7600,5265,failed,US,False,53,False,99.339623
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False,72.909091
10,Green Ltd,5200,13838,successful,US,False,220,False,62.9


In [9]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

us_df["Average Amount"] = us_df["Average Amount"].astype(float).map("${:,.2f}".format)

# Display the DataFrame


Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,"$1,400.00","$14,560.00",successful,US,False,158,True,$92.15
3,"Mcdonald, Gonzalez and Ross","$4,200.00","$2,477.00",failed,US,False,24,False,$103.21
4,Larson-Little,"$7,600.00","$5,265.00",failed,US,False,53,False,$99.34
9,"Rangel, Holt and Jones","$6,200.00","$3,208.00",failed,US,False,44,False,$72.91
10,Green Ltd,"$5,200.00","$13,838.00",successful,US,False,220,False,$62.90


In [10]:
# Calculate the total number of backers for all US projects
us_df["backers_count"].sum()

545510

In [11]:
# Calculate the average number of backers for all US projects
us_df["backers_count"].mean()

715.8923884514436

In [27]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
us_df = reduce_df.loc[reduce_df["staff_pick"] == True]
us_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
76,"Martin, Conway and Larsen",122900,95993,failed,US,True,1684,True
86,Davis-Smith,7400,12405,successful,US,True,203,False
123,Edwards-Lewis,177700,33092,failed,CA,True,662,False
193,"Calhoun, Rogers and Long",6600,3012,failed,US,True,65,False
205,Weaver-Marquez,1300,5614,successful,US,True,80,False


In [29]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)
outcome_df = us_df.groupby("outcome")
outcome_df["name"].count()

outcome
canceled       4
failed        17
successful    28
Name: name, dtype: int64