#### Setup:

In [None]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import json
from IPython.display import clear_output

In [None]:
pd.set_option('display.max_columns', None)

#### Import Webrobots Seed Dataset:

In [None]:
kickstarter_webrobots_df = pd.read_csv("02_Data Collection/Kickstarter_Webrobots_Cleaned.csv", index_col=0)

#### Get indices of Webrobots dataset, which should be scraped:

In [None]:
# Create additional attribute "year"
kickstarter_webrobots_df["year"] = kickstarter_webrobots_df.deadline.apply(lambda x: x[0:4])
kickstarter_webrobots_df.year.value_counts().sort_index(ascending=False)

2020    21948
2019    28285
2018    30170
2017    34404
2016    36724
2015    51088
2014    42862
2013    27606
2012    26658
2011    15357
2010     5974
2009      613
Name: year, dtype: int64

In [None]:
# Retrieve only projects from 2019 and 2020, since these are the ones, which are missing in the Berkeley dataset
scrape_indices = kickstarter_webrobots_df[(kickstarter_webrobots_df.year=="2019") | (kickstarter_webrobots_df.year=="2020")].index
len(scrape_indices)

50233

#### Retrieve Additional Attributes via Kickstarter Graph API:

In [None]:
# Initialize DataFrame for Kickstarter Graph API
kickstarter_graph_df = pd.DataFrame(columns=["graph_pid", "graph_name", "graph_blurb", "graph_state", "graph_percent_funded",
                                            "graph_backers_count", "graph_comments_count", "graph_updates_count", "graph_url", 
                                            "graph_campaign_has_video", "graph_campaign_has_video_2", "graph_location_name", "graph_location_state", "graph_location_country", 
                                            "graph_story", "graph_risks", "graph_environmental_commitments", "graph_created_at", "graph_launched_at", 
                                            "graph_state_changed_at", "graph_deadline_at", "graph_subcategory",
                                            "graph_category", "graph_pledged_amount", "graph_pledged_currency", "graph_profile_blurb", 
                                            "graph_profile_name", "graph_is_project_we_love", "graph_creator_verified_identity", 
                                            "graph_creator_name", "graph_creator_url", "graph_creator_has_image", 
                                            "graph_creator_last_login", "graph_creator_biography", "graph_creator_is_facebook_connected",
                                            "graph_creator_allows_follows", "graph_creator_backings_count", "graph_creator_location_name",
                                            "graph_creator_location_state", "graph_creator_location_country", "graph_creator_launched_projects",
                                            "graph_creator_websites", "graph_creator_collaborators", "graph_number_of_rewards", "graph_rewards"])
print(kickstarter_graph_df.shape)

(0, 45)


In [None]:
def makeKickstarterRequest(project_id):

    # Retrieve csrf token from Kickstarter
    session = requests.session()
    r = session.get("https://www.kickstarter.com")
    soup = BeautifulSoup(r.text, 'html.parser')    
    xcsrf = soup.find("meta", {"name": "csrf-token"})
    if type(xcsrf) == type(None):
        return None
    headers["x-csrf-token"] = xcsrf["content"]
    
    # Build query for retrieving additional features from Graph API
    query = """
    query Campaign($pid: Int!) {
      project(pid: $pid) {
        pid
        name
        description
        state
        percentFunded
        backersCount
        commentsCount
        timeline{
            totalCount
        }
        url
        isWatchable
        video{
            id
        }
        location{
            displayableName
            state
            countryName
        }
        story
        risks
        environmentalCommitments{
            commitmentCategory
            description
        }
        createdAt
        launchedAt
        stateChangedAt
        deadlineAt
        category{
            name
            parentCategory{
                name
            }
        }
        pledged{
            amount
            currency
        }
        profile{
            blurb
            name
        }
        isProjectWeLove
        verifiedIdentity
        creator{
            name
            url
            hasImage
            lastLogin
            biography
            isFacebookConnected
            allowsFollows
            backingsCount
            location{
                displayableName
                state
                countryName
            }
            launchedProjects{
                totalCount
            }
            websites{
                url
                domain
            }
        }
        collaborators{
            edges{
                title
            }
        }
        rewards{
            totalCount
            nodes{
                name
                description
                shippingPreference
                shippingSummary
                limit
                estimatedDeliveryOn
                startsAt
                endsAt
                backersCount
                items{
                    nodes{
                        name
                    }
                }
                amount{
                    currency
                    amount
                }
                convertedAmount{
                    currency
                    amount
                }
            }
        }
      }
    }"""
    
    # Perform Graph API call    
    r = session.post("https://www.kickstarter.com/graph",
    headers=headers,
    json = {
        "operationName":"Campaign",
        "variables":{
            "pid": project_id
        },
        "query": query
    })
    
    if r is None:
        return None
        
    result = r.json()
    if result["data"]["project"] is None:
        return None
    
    # Extract Attributes from JSON response
    attribute_dict = {
        "graph_pid" : result["data"]["project"]["pid"],
        "graph_name" : result["data"]["project"]["name"],
        "graph_blurb" : result["data"]["project"]["description"],
        "graph_state" : result["data"]["project"]["state"],
        "graph_percent_funded" : result["data"]["project"]["percentFunded"],
        "graph_backers_count" : result["data"]["project"]["backersCount"],
        "graph_comments_count" : result["data"]["project"]["commentsCount"],
        "graph_updates_count" : result["data"]["project"]["timeline"]["totalCount"] if result["data"]["project"]["timeline"] is not None else None,
        "graph_url" : result["data"]["project"]["url"],
        "graph_campaign_has_video" : result["data"]["project"]["isWatchable"],
        "graph_campaign_has_video_2" : result["data"]["project"]["video"]["id"] if result["data"]["project"]["video"] is not None else None,
        "graph_location_name" : result["data"]["project"]["location"]["displayableName"] if result["data"]["project"]["location"] is not None else None,
        "graph_location_state" : result["data"]["project"]["location"]["state"] if result["data"]["project"]["location"] is not None else None,
        "graph_location_country" : result["data"]["project"]["location"]["countryName"] if result["data"]["project"]["location"] is not None else None, 
        "graph_story" : result["data"]["project"]["story"],
        "graph_risks" : result["data"]["project"]["risks"],
        "graph_environmental_commitments" : result["data"]["project"]["environmentalCommitments"],
        "graph_created_at" : result["data"]["project"]["createdAt"],
        "graph_launched_at" : result["data"]["project"]["launchedAt"],
        "graph_state_changed_at" : result["data"]["project"]["stateChangedAt"],
        "graph_deadline_at" : result["data"]["project"]["deadlineAt"],
        "graph_subcategory" : result["data"]["project"]["category"]["name"] if result["data"]["project"]["category"] is not None else None,
        "graph_category" : result["data"]["project"]["category"]["parentCategory"]["name"] if (result["data"]["project"]["category"] is not None) & (result["data"]["project"]["category"]["parentCategory"] is not None) else None,
        "graph_pledged_amount" : result["data"]["project"]["pledged"]["amount"] if result["data"]["project"]["pledged"] is not None else None,
        "graph_pledged_currency" : result["data"]["project"]["pledged"]["currency"] if result["data"]["project"]["pledged"] is not None else None,
        "graph_profile_blurb" : result["data"]["project"]["profile"]["blurb"] if result["data"]["project"]["profile"] is not None else None,
        "graph_profile_name" : result["data"]["project"]["profile"]["name"] if result["data"]["project"]["profile"] is not None else None,
        "graph_is_project_we_love" : result["data"]["project"]["isProjectWeLove"],
        "graph_creator_verified_identity" : result["data"]["project"]["verifiedIdentity"],
        "graph_creator_name" : result["data"]["project"]["creator"]["name"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_url" : result["data"]["project"]["creator"]["url"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_has_image" : result["data"]["project"]["creator"]["hasImage"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_last_login" : result["data"]["project"]["creator"]["lastLogin"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_biography" : result["data"]["project"]["creator"]["biography"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_is_facebook_connected" : result["data"]["project"]["creator"]["isFacebookConnected"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_allows_follows" : result["data"]["project"]["creator"]["allowsFollows"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_backings_count" : result["data"]["project"]["creator"]["backingsCount"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_location_name" : result["data"]["project"]["creator"]["location"]["displayableName"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_location_state" : result["data"]["project"]["creator"]["location"]["state"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_location_country" : result["data"]["project"]["creator"]["location"]["countryName"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["location"] is not None) else None,
        "graph_creator_launched_projects" : result["data"]["project"]["creator"]["launchedProjects"]["totalCount"] if (result["data"]["project"]["creator"] is not None) & (result["data"]["project"]["creator"]["launchedProjects"] is not None) else None,
        "graph_creator_websites" : result["data"]["project"]["creator"]["websites"] if result["data"]["project"]["creator"] is not None else None,
        "graph_creator_collaborators" : result["data"]["project"]["collaborators"]["edges"] if result["data"]["project"]["collaborators"] is not None else None,
        "graph_number_of_rewards" : result["data"]["project"]["rewards"]["totalCount"] if result["data"]["project"]["rewards"] is not None else None,
        "graph_rewards" : result["data"]["project"]["rewards"]["nodes"] if result["data"]["project"]["rewards"] is not None else None
    }

    return attribute_dict

In [None]:
# Retrieve Graph API features for all Kickstarter projects
counter = 0
for i in scrape_indices[49713:50233]:
    counter += 1
    print("Project {}, {}".format(counter, i))
    clear_output(wait=True)
    
    kickstarter_graph_df = kickstarter_graph_df.append(makeKickstarterRequest(i), ignore_index=True)

Project 520, 250452269


In [None]:
kickstarter_graph_df.shape

(36153, 45)

In [None]:
kickstarter_graph_df.isna().sum()

graph_pid                                  0
graph_name                                 0
graph_blurb                                0
graph_state                                0
graph_percent_funded                       0
graph_backers_count                        0
graph_comments_count                       0
graph_updates_count                        0
graph_url                                  0
graph_campaign_has_video                   0
graph_campaign_has_video_2             13579
graph_location_name                        0
graph_location_state                      10
graph_location_country                     1
graph_story                                0
graph_risks                               13
graph_environmental_commitments            0
graph_created_at                           0
graph_launched_at                          0
graph_state_changed_at                     0
graph_deadline_at                          0
graph_subcategory                          0
graph_cate

In [None]:
kickstarter_graph_df.tail()

Unnamed: 0,graph_pid,graph_name,graph_blurb,graph_state,graph_percent_funded,graph_backers_count,graph_comments_count,graph_updates_count,graph_url,graph_campaign_has_video,graph_campaign_has_video_2,graph_location_name,graph_location_state,graph_location_country,graph_story,graph_risks,graph_environmental_commitments,graph_created_at,graph_launched_at,graph_state_changed_at,graph_deadline_at,graph_subcategory,graph_category,graph_pledged_amount,graph_pledged_currency,graph_profile_blurb,graph_profile_name,graph_is_project_we_love,graph_creator_verified_identity,graph_creator_name,graph_creator_url,graph_creator_has_image,graph_creator_last_login,graph_creator_biography,graph_creator_is_facebook_connected,graph_creator_allows_follows,graph_creator_backings_count,graph_creator_location_name,graph_creator_location_state,graph_creator_location_country,graph_creator_launched_projects,graph_creator_websites,graph_creator_collaborators,graph_number_of_rewards,graph_rewards
36148,575923225,SKAMH,"A surreal journey to an abandoned land, a forg...",FAILED,0,4,0,2,https://www.kickstarter.com/projects/497272959...,False,VmlkZW8tOTIzNjQ3,"Rome, Italy",Lazio,Italy,"<div class=""template asset"" contenteditable=""f...",Our project was born out of the uncontrollable...,[],1540979806,1542027153,1547211153,1547211153,Comic Books,Comics,4.0,EUR,,,False,Luca Pallotti,SKAMH,https://www.kickstarter.com/profile/497272959,True,1542795889,"Il nostro progetto nasce dalla voglia, ormai d...",False,True,0,"Rome, Italy",Lazio,Italy,1,"[{'url': 'https://skamh.godaddysites.com', 'do...",[],9,"[{'name': 'SUPPORT SKAMH', 'description': 'Tha..."
36149,2111656222,ULTRA comic book,A dark and gritty sci-fi comic created by Henr...,FAILED,11,7,1,2,https://www.kickstarter.com/projects/ultracomi...,False,,"Dallas, TX",TX,United States,<p>THE STORY: ULTRA is a dark sci-fi story abo...,A challenge for this project will be getting e...,[],1543688795,1543709933,1546301935,1546301933,Comic Books,Comics,111.0,USD,,,False,Matt Cox,Henry Cox,https://www.kickstarter.com/profile/ultracomic,True,1546636499,"I am an artist from Dallas, Texas. I mainly do...",False,True,0,"Dallas, TX",TX,United States,1,[],[],3,"[{'name': 'Digital copy!', 'description': 'Get..."
36150,828378725,Young Heroes Undefeated: Breaking barriers wit...,We create original comics for children with sp...,FAILED,15,27,1,2,https://www.kickstarter.com/projects/155534662...,False,VmlkZW8tOTMwNjA5,"New York, NY",NY,United States,"<div class=""template asset"" contenteditable=""f...",Although Kenneth Edwards and our humble but de...,[],1543756456,1544979604,1547571604,1547571604,Comic Books,Comics,461.0,USD,,,True,Nnamdi Mcclean,Robert Cornegy III,https://www.kickstarter.com/profile/1555346627,True,1598997731,"Writer, Emcee, and storyteller from Brooklyn,NY",True,True,2,"Brooklyn, NY",NY,United States,1,"[{'url': 'https://yhuf.org/', 'domain': 'yhuf....",[],10,"[{'name': 'Thank you + Digital poster', 'descr..."
36151,560316987,Rune's Gate Motion Comic,"Players of Rune's Gate, a MMORPG game that use...",FAILED,0,2,0,2,https://www.kickstarter.com/projects/ujucomics...,False,VmlkZW8tOTIyMTUz,"San Diego, CA",CA,United States,"<div class=""template asset"" contenteditable=""f...",Finding the right artists for the job! Artists...,[],1541276297,1541592805,1546776807,1546776805,Comic Books,Comics,38.0,USD,,,False,Andrew Burger,UjuComics,https://www.kickstarter.com/profile/ujucomics,True,1542338675,"My name is Andrew. I'm a 3d animator, comic wr...",False,True,0,"Louisville, KY",KY,United States,2,[{'url': 'https://www.youtube.com/channel/UChN...,[],6,"[{'name': 'Digital Copy', 'description': 'Rece..."
36152,250452269,"C.M.S Comics, Inc.",Creating the newest and hottest studio and com...,FAILED,5,4,1,4,https://www.kickstarter.com/projects/chazclark...,False,,"Dearborn, MI",MI,United States,"<div class=""template asset"" contenteditable=""f...",The biggest challenges are to compete with oth...,[],1529957675,1541120554,1546308154,1546308154,Comic Books,Comics,42.0,USD,,,False,Chaz Clark,Chaz Clark,https://www.kickstarter.com/profile/chazclark2,True,1603297830,Chaz Clark is a comic book rookie that made mo...,True,True,0,"Dearborn, MI",MI,United States,3,[],[],5,"[{'name': 'Thank You!', 'description': 'YOU! Y..."


#### Save Dataset:

In [None]:
kickstarter_graph_df.to_csv("02_Data Collection/Kickstarter_Graph_API_Webrobots.csv")

In [None]:
kickstarter_graph_df = pd.read_csv("02_Data Collection/Kickstarter_Graph_API_Webrobots.csv", index_col=0)
kickstarter_graph_df.shape

(36153, 45)