#### Setup:

In [None]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import json
from IPython.display import clear_output

#### Import Berkeley Seed Dataset:

In [None]:
kickstarter_berkeley_df = pd.read_csv("02_Data Collection/Kickstarter_Berkeley_Cleaned.csv", index_col=0)
print(kickstarter_berkeley_df.shape)
kickstarter_berkeley_df.head()

(366907, 43)


Unnamed: 0_level_0,url_project,name,blurb,state,goal,pledged,backers_count,comments_count,updates_count,staff_pick,...,creator_twitter_linked,creator_instagram_links,creator_instagram_linked,creator_youtube_links,creator_youtube_linked,creator_linkedin_links,creator_linkedin_linked,creator_location_name,creator_state,creator_country
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22810934,https://www.kickstarter.com/projects/cavinboun...,"Lights, Coma, Action!",A book to tell the story of who I was (LIGHTS)...,successful,15000.0,18167.88,284,4,40,False,...,False,,False,,False,,False,"Austin, TX",Texas,United States
22821161,https://www.kickstarter.com/projects/684178251...,Sentio Golf Putters: Feel IS the difference,Choose the feel YOU want with our patented flo...,failed,50000.0,9707.0,41,0,4,False,...,False,,False,,False,,False,"Acton, MA",Massachusetts,United States
22823613,https://www.kickstarter.com/projects/123754320...,Brainade,We intend to create an online platform for ind...,failed,750.0,1.0,1,0,0,False,...,False,,False,,False,,False,"Spearfish, SD",South Dakota,United States
22826244,https://www.kickstarter.com/projects/impkingco...,Horizon Anthology,HORIZON is a 144-page collection of fantasy co...,successful,7000.0,8955.41,286,20,16,True,...,False,,False,,False,,False,,,
22835897,https://www.kickstarter.com/projects/alphabets...,The Alphabet Story Children's Book,A richly illustrated children's book following...,successful,6000.0,6643.11,66,0,8,False,...,False,,False,,False,,False,"Portland, OR",Oregon,United States


#### Retrieve Additional Attributes via Kickstarter Graph API:

In [None]:
# Initialize DataFrame for Kickstarter Graph API
kickstarter_graph_df = pd.DataFrame(columns=["graph_pid", "graph_name", "graph_blurb", "graph_state", "graph_percent_funded",
                                            "graph_backers_count", "graph_comments_count", "graph_url", "graph_project_image",
                                            "graph_location_name", "graph_location_state", "graph_location_country", 
                                            "graph_story", "graph_risks", "graph_environmental_commitments", "graph_prelaunch_activated",
                                            "graph_is_watched", "graph_watches_count", "graph_created_at", "graph_launched_at", 
                                            "graph_state_changed_at", "graph_canceled_at", "graph_deadline_at", "graph_subcategory",
                                            "graph_category", "graph_pledged_amount", "graph_pledged_currency", "graph_profile_blurb", 
                                            "graph_profile_name", "graph_is_project_of_the_day", "graph_is_project_we_love",
                                            "graph_creator_verified_identity", "graph_creator_name", "graph_creator_url", "graph_creator_has_image", 
                                            "graph_creator_image", "graph_creator_last_login", "graph_creator_biography", "graph_creator_is_facebook_connected",
                                            "graph_creator_allows_follows", "graph_creator_backings_count", "graph_creator_location_name",
                                            "graph_creator_location_state", "graph_creator_location_country", "graph_creator_launched_projects",
                                            "graph_creator_websites", "graph_creator_collaborators"])

In [None]:
def makeKickstarterRequest(project_id):
    
    # Retrieve csrf token from Kickstarter
    session = requests.session()
    r = session.get("https://www.kickstarter.com")
    soup = BeautifulSoup(r.text, 'html.parser')    
    xcsrf = soup.find("meta", {"name": "csrf-token"})
    if type(xcsrf) == type(None):
        return None
    headers["x-csrf-token"] = xcsrf["content"]
    
    # Build query for retrieving additional features
    query = """
    query Campaign($pid: Int!) {
      project(pid: $pid) {
        pid
        name
        description
        state
        percentFunded
        backersCount
        commentsCount
        url
        imageUrl(width:300)
        location{
            displayableName
            state
            countryName
        }
        story
        risks
        environmentalCommitments{
            commitmentCategory
            description
        }
        prelaunchActivated
        isWatched
        watchesCount
        createdAt
        launchedAt
        stateChangedAt
        canceledAt
        deadlineAt
        category{
            name
            parentCategory{
                name
            }
        }
        pledged{
            amount
            currency
        }
        profile{
            blurb
            name
        }
        isProjectOfTheDay
        isProjectWeLove
        verifiedIdentity
        creator{
            name
            url
            hasImage
            imageUrl(width: 300)
            lastLogin
            biography
            isFacebookConnected
            allowsFollows
            backingsCount
            location{
                displayableName
                state
                countryName
            }
            launchedProjects{
                totalCount
            }
            websites{
                url
                domain
            }
        }
        collaborators{
            edges{
                title
            }
        }
      }
    }"""
    
    # Perform Graph API call    
    r = session.post("https://www.kickstarter.com/graph",
    headers=headers,
    json = {
        "operationName":"Campaign",
        "variables":{
            "pid": project_id
        },
        "query": query
    })
    
    if r is None:
        return None
        
    result = r.json()
    if result["data"]["project"] is None:
        return None
    
    # Extract Attributes from JSON response
    attribute_dict = {
        "graph_pid" : result["data"]["project"]["pid"],
        "graph_name" : result["data"]["project"]["name"],
        "graph_blurb" : result["data"]["project"]["description"],
        "graph_state" : result["data"]["project"]["state"],
        "graph_percent_funded" : result["data"]["project"]["percentFunded"],
        "graph_backers_count" : result["data"]["project"]["backersCount"],
        "graph_comments_count" : result["data"]["project"]["commentsCount"],
        "graph_url" : result["data"]["project"]["url"],
        "graph_project_image" : result["data"]["project"]["imageUrl"],
        "graph_location_name" : result["data"]["project"]["location"]["displayableName"] if result["data"]["project"]["location"] is not None else None,
        "graph_location_state" : result["data"]["project"]["location"]["state"] if result["data"]["project"]["location"] is not None else None,
        "graph_location_country" : result["data"]["project"]["location"]["countryName"] if result["data"]["project"]["location"] is not None else None, 
        "graph_story" : result["data"]["project"]["story"],
        "graph_risks" : result["data"]["project"]["risks"],
        "graph_environmental_commitments" : result["data"]["project"]["environmentalCommitments"],
        "graph_prelaunch_activated" : result["data"]["project"]["prelaunchActivated"],
        "graph_is_watched" : result["data"]["project"]["isWatched"],
        "graph_watches_count" : result["data"]["project"]["watchesCount"],
        "graph_created_at" : result["data"]["project"]["createdAt"],
        "graph_launched_at" : result["data"]["project"]["launchedAt"],
        "graph_state_changed_at" : result["data"]["project"]["stateChangedAt"],
        "graph_canceled_at" : result["data"]["project"]["canceledAt"],
        "graph_deadline_at" : result["data"]["project"]["deadlineAt"],
        "graph_subcategory" : result["data"]["project"]["category"]["name"] if result["data"]["project"]["category"] is not None else None,
        "graph_category" : result["data"]["project"]["category"]["parentCategory"]["name"] if (result["data"]["project"]["category"] is not None) & (result["data"]["project"]["category"]["parentCategory"] is not None) else None,
        "graph_pledged_amount" : result["data"]["project"]["pledged"]["amount"] if result["data"]["project"]["pledged"] is not None else None,
        "graph_pledged_currency" : result["data"]["project"]["pledged"]["currency"] if result["data"]["project"]["pledged"] is not None else None,
        "graph_profile_blurb" : result["data"]["project"]["profile"]["blurb"] if result["data"]["project"]["profile"] is not None else None,
        "graph_profile_name" : result["data"]["project"]["profile"]["name"] if result["data"]["project"]["profile"] is not None else None,
        "graph_is_project_of_the_day" : result["data"]["project"]["isProjectOfTheDay"],
        "graph_is_project_we_love" : result["data"]["project"]["isProjectWeLove"],
        "graph_creator_verified_identity" : result["data"]["project"]["verifiedIdentity"],
        "graph_creator_name" : result["data"]["project"]["creator"]["name"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_url" : result["data"]["project"]["creator"]["url"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_has_image" : result["data"]["project"]["creator"]["hasImage"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_image" : result["data"]["project"]["creator"]["imageUrl"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_last_login" : result["data"]["project"]["creator"]["lastLogin"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_biography" : result["data"]["project"]["creator"]["biography"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_is_facebook_connected" : result["data"]["project"]["creator"]["isFacebookConnected"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_allows_follows" : result["data"]["project"]["creator"]["allowsFollows"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_backings_count" : result["data"]["project"]["creator"]["backingsCount"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_location_name" : result["data"]["project"]["creator"]["location"]["displayableName"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_location_state" : result["data"]["project"]["creator"]["location"]["state"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_location_country" : result["data"]["project"]["creator"]["location"]["countryName"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_launched_projects" : result["data"]["project"]["creator"]["launchedProjects"]["totalCount"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["launchedProjects"] is not None) else None,
        "graph_creator_websites" : result["data"]["project"]["creator"]["websites"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_collaborators" : result["data"]["project"]["collaborators"]["edges"] if result["data"]["project"]["collaborators"] is not None else None
    }

    return attribute_dict

In [None]:
# Retrieve Graph API features for all Kickstarter projects
counter = 0
for i in kickstarter_berkeley_df.index[365999:366907]:
    counter += 1
    print("Project {}, {}".format(counter, i))
    clear_output(wait=True)
    
    kickstarter_graph_df = kickstarter_graph_df.append(makeKickstarterRequest(i), ignore_index=True)

Project 908, 1527584334


In [None]:
kickstarter_graph_df.shape

(282152, 47)

In [None]:
kickstarter_graph_df.isna().sum()

graph_pid                                   0
graph_name                                  0
graph_blurb                                 0
graph_state                                 0
graph_percent_funded                        0
graph_backers_count                         0
graph_comments_count                        0
graph_url                                   0
graph_project_image                         0
graph_location_name                      1152
graph_location_state                     1233
graph_location_country                   1157
graph_story                                21
graph_risks                             48032
graph_environmental_commitments             0
graph_prelaunch_activated                   0
graph_is_watched                            0
graph_watches_count                         0
graph_created_at                            0
graph_launched_at                           0
graph_state_changed_at                      0
graph_canceled_at                 

In [None]:
kickstarter_graph_df.tail()

Unnamed: 0,graph_pid,graph_name,graph_blurb,graph_state,graph_percent_funded,graph_backers_count,graph_comments_count,graph_url,graph_project_image,graph_location_name,...,graph_creator_biography,graph_creator_is_facebook_connected,graph_creator_allows_follows,graph_creator_backings_count,graph_creator_location_name,graph_creator_location_state,graph_creator_location_country,graph_creator_launched_projects,graph_creator_websites,graph_creator_collaborators
282147,1503837604,Skiptracing.,"A short film, based on the life of a private d...",SUCCESSFUL,100,12,0,https://www.kickstarter.com/projects/111562635...,https://ksr-ugc.imgix.net/assets/023/484/341/e...,"Dublin, Ireland",...,A film student working on my final year film.,True,True,0,"Dublin, Ireland",Dublin,Ireland,1,[],[]
282148,1565332168,Rose Gold Bubbline,We will be creating rose gold bubbline pins!,SUCCESSFUL,101,18,0,https://www.kickstarter.com/projects/magicalgi...,https://ksr-ugc.imgix.net/assets/023/670/390/3...,"Orlando, FL",...,"A company for lovers of kpop, boba, anime, and...",False,True,0,"Tallahassee, FL",FL,United States,9,[],[]
282149,494403885,Jurassic Replicas: Pterosaur & Velociraptor Claw,Bring the museum HOME with a superior quality ...,FAILED,48,65,7,https://www.kickstarter.com/projects/jurassicr...,https://ksr-ugc.imgix.net/assets/023/659/683/d...,"Montreal, Canada",...,We are a 3D Design and Prototyping startup tha...,False,True,0,"Montreal, Canada",QC,Canada,9,[{'url': 'https://www.facebook.com/tyrannosaur...,[{'title': 'Collaborator'}]
282150,1231299952,Make 100 Fantasy Locations,"You Create the Locations, I'll Draw it on one ...",SUCCESSFUL,1020,325,67,https://www.kickstarter.com/projects/bannister...,https://ksr-ugc.imgix.net/assets/023/736/351/c...,"Adelaide, AU",...,Professional Gamesmaster since 1992\r\nCreator...,True,True,156,"Adelaide, AU",SA,Australia,1,[{'url': 'http://www.dungeondelverstwelve.com/...,[]
282151,1527584334,Mayo the Cat,A children's book about an imaginative cat na...,SUCCESSFUL,140,120,28,https://www.kickstarter.com/projects/mayotheca...,https://ksr-ugc.imgix.net/assets/023/665/452/8...,"New York, NY",...,"Pedro Rivera (commonly known as Frost), is bes...",True,True,0,"New York, NY",NY,United States,1,"[{'url': 'https://frostthegreat.net', 'domain'...",[]


#### Save Dataset:

In [None]:
kickstarter_graph_df.to_csv("02_Data Collection/Kickstarter_Graph_API_Berkeley.csv")

In [None]:
kickstarter_graph_df = pd.read_csv("02_Data Collection/Kickstarter_Graph_API_Berkeley.csv", index_col=0)
kickstarter_graph_df.shape

(282152, 47)