# Scraping Sneaker Data from the Stock X website

### There are 3 components to this notebook:

1. Using the individual shoe keys to scrape all the shoe info 
2. Cleaning the scraped data.
3. Using the shoe name search/random shoe generator to get the price history for a specific shoe

In [33]:
import requests
from datetime import datetime
from datetime import timedelta
import pandas as pd
import numpy as np
import glob
from urllib.request import Request, urlopen
import time
import random
import io
import dropbox

In [2]:
# open a csv with all the sneakers (used for continuation after error)
# second_path = r'/Users/gabbyvinco/Desktop/sneakers_df.csv'
# round2 = pd.read_csv(second_path, index_col=None, header=0)

In [7]:
# import the query key csv from dropbox
frame = pd.read_csv('https://www.dropbox.com/s/90lnoot6o9usjtl/cumulative_list?dl=1')

In [9]:
# rename column to 'urlKeys' and rename dataframe
dataframe = frame.rename(columns={'0': 'urlKeys'})

In [237]:
# taking the urlKeys and inserting them in to th complete url with parameters
full_urls = []

for value in dataframe['urlKeys']:
    full = 'https://stockx.com/api/products/'+value+'?includes=market,360&currency=EUR&country=IT'
    #'https://stockx.com/api/products/'+value+
        # this part is determining which file/shoe you want to work with 
    #'?includes=market,360&currency=EUR&country=IT' 
        # this part is basically a string that is being passed down to your web browser to the web server
    # with this url we can send a GET request and fetch the information that is listed in the Inspect>Network>Preview
    full_urls.append(full)
    
# create a new column with the full urls    
dataframe['urlFull'] = full_urls

In [238]:
len(full_urls)

2471

In [243]:
# create a new list to hold the sneakers and all their information
info = []

# 1. Shoe Info (getting the basic information regarding the shoe)

Here we use the keys that we grabbed in the previous script. These keys when added to the query take you to the page for that specific shoe. From there we were able to make a request and gather information like the Stock X shoe identification number, brand, colorway, release date, retail price,official shoe name, volatility, change percentage, and marketed gender of the shoe.

In [244]:
# create a function that loops through the full url list, extracts the variables that we want
# and prints them out in a neat format

def get_shoe_info(url_list):
    for url in url_list:
        headers = {
            "accept-encoding": "gzip, deflate, br",
            "sec-fetch-mode": "cors",
            "sec=fetch-site": "same-origin",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
            "x-requested-with": "XMLHttpRequest"
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        product = response.json()["Product"]
        try:
            id_num = product["id"]
        except:
            print("no id")
            id_num = 0
        try:
            brand = product["brand"]
        except:
            print("no brand")
            brand = 0
        try:
            colorway = product["colorway"]
        except:
            print("no colorway")
            colorway = 0
        try:    
            release_date = product["releaseDate"]
        except:
            print("no release date")
            release_date = 0
        try:
            retail_price = product["retailPrice"]
        except:
            print("no retail price")
            retail_price = 0
        try:
            shoe_name = product["shoe"]
        except:
            print("no shoe name")
            shoe_name = 0
        try:
            volatility = product["market"]["volatility"]
        except:
            print("no volatility")
            volatility = 0
        try:
            change_percentage = product["market"]["changePercentage"]
        except:
            print("no change percentage")
            change_percentage = 0
        try:
            gender = product["gender"]
        except:
            print("no gender")
            gender = 0

        info.append([id_num,
                     brand,
                     colorway,
                     release_date,
                     retail_price,
                     shoe_name,
                     volatility,
                     change_percentage,
                     gender])

        print("shoe info added")
        time.sleep(5)

    return 0
    

In [245]:
# (for personal use) brief explanation of __name__ == "__main__"
# the global variable = __name__ and the entry point = __main__ (or the name that you import the module by)
# so the code below this if statement will only run if the module == entry point to your program
# it allows the code in the module to be importable by other modules without executing the code beneath the block on import

if __name__ == "__main__":
    import sys
    sys.exit(get_shoe_info(full_urls))

shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
no release date
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
no release date
no retail price
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
shoe info added
no release date
shoe info added
shoe info added
shoe info added
no release date
shoe info added
shoe info added
shoe info added
shoe info added
no release date
shoe info added
shoe info added
shoe info added
no retail price
shoe info added
shoe info added
no release date
no retail price
shoe info added
no release date
shoe info added
shoe info added
shoe info added
shoe info added
no retail price
shoe info added
no retail price
shoe info added
no release date
shoe info added
shoe info added
no release date
shoe info added
shoe info added
shoe inf

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [247]:
# add info into sneakers_df
# sneaker_df = pd.DataFrame(info, columns = ["ID","Brand", "Colorway","ReleaseDate","RetailPrice","Name","Volatility","ChangePercentage","Gender"])

# re-ran the script because there was a timeout error so here we just added to the point at which it was left off
# round2 = round2.append(info)

In [248]:
# adding to dataset after timeout error
# round2.to_csv (r'/Users/gabbyvinco/Desktop/sneakers_df.csv', index = False, header=True)

# 2. Cleaning the scraped data

Here we are taking the data gathered about each shoe and cleaning it. There were some issues in the scrape with errors stopping the script and as a result of that there were some issues in offsets of the columns. This is all fixed in this section and prepares the data for futher analysis.

In [12]:
# import sneakers_df from dropbox
sneakers_df = pd.read_csv('https://www.dropbox.com/s/fridkrkb5ifydt0/sneakers_df.csv?dl=1')

In [14]:
sneakers_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,0.1,...,8.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1,Brand,ChangePercent,Colorway,Gender,ID,Name,ReleaseDate,RetailPrice,Volatility
0,,,,,,,,,,,...,,Yeezy,0.018072,Resin/Resin/Resin,men,bd225b2f-362f-4475-bd4a-375a23986dee,Yeezy Slide,2021-04-26,55.0,0.086295
1,,,,,,,,,,,...,,Yeezy,-0.061611,Core/Core/Core,men,a5d54b52-13e4-42fc-8821-71729bfb8b72,Yeezy Slide,2021-04-26,55.0,0.06072
2,,,,,,,,,,,...,,New Balance,-0.104348,White/Natural-Green,men,802d45e3-6eb2-4733-816f-87c7af67f249,New Balance 550,2021-04-16,130.0,0.133226
3,,,,,,,,,,,...,,New Balance,0.236104,White/Navy-Red,men,5f9d25f9-a884-43ee-bf2c-6a066ce66eee,New Balance 550,2021-04-16,130.0,0.209088
4,,,,,,,,,,,...,,Converse,0.028391,White/Bold Orange-Black,men,24453042-3907-4c9c-9a2c-ca7e2ce7ed1a,Converse Chuck Taylor All-Star,2018-10-08,130.0,0.084268


In [15]:
# align and rearrange the data by the offseted groups of rows

set0 = sneakers_df.loc[0:211,:]
set1 = sneakers_df.loc[212:366,:]
set2 = sneakers_df.loc[367:522,:]
set3 = sneakers_df.loc[523:677,:]
set4 = sneakers_df.loc[678:832,:]
set5 = sneakers_df.loc[833:987,:]
set6 = sneakers_df.loc[988:1141,:]
set7 = sneakers_df.loc[1142:1291,:]
set8 = sneakers_df.loc[1292:1446,:]
set9 = sneakers_df.loc[1447:1601,:]
set10 = sneakers_df.loc[1602:1756,:]
set11 = sneakers_df.loc[1757:1911,:]
set12 = sneakers_df.loc[1912:2066,:]
set13 = sneakers_df.loc[2067:2221,:]
set14 = sneakers_df.loc[2222:2376,:]
set15 = sneakers_df.loc[2377:2532,:]
set16 = sneakers_df.loc[2533:,:]

In [16]:
# create a list of all the dataframes to then loop through 
df_list = [set1,set2,set3,set4,set5,set6,set7,set8,set9,set10,set11,set12,set13,set14,set15,set16]

In [17]:
# create a list to add to after removing all NaN values
dropped_list = []

In [18]:
# get rid of all NaN rows that were created by some issue in the scraping code
for s in df_list:
    s = s.dropna(axis='columns', how ='all')
    dropped_list.append(s)

In [19]:
dropped_list[0].head()

Unnamed: 0,0.1.1.1.1.1.1.1.1.1.1.1.1.1.1,1.1.1.1.1.1.1.1.1.1.1.1.1.1.1,2.1.1.1.1.1.1.1.1.1.1.1.1.1.1,3.1.1.1.1.1.1.1.1.1.1.1.1.1.1,4.1.1.1.1.1.1.1.1.1.1.1.1.1.1,5.1.1.1.1.1.1.1.1.1.1.1.1.1.1,6.1.1.1.1.1.1.1.1.1.1.1.1.1.1,7.1.1.1.1.1.1.1.1.1.1.1.1.1.1,8.1.1.1.1.1.1.1.1.1.1.1.1.1.1
212,daf1086a-2228-4d34-9dca-6c647058aab2,New Balance,Sea Salt/Black,0,90.0,New Balance 327,0.231971,0.613924,men
213,0c388564-75e0-4465-a60f-f6a293799646,New Balance,Beige/Denim-Orange,2020-04-24,200.0,New Balance 1300,0.156082,-0.035003,men
214,93a40e26-ad4c-4194-8795-3553ce556213,Converse,Bright Blue,2020-04-02,150.0,Converse Chuck Taylor All-Star,0.193839,0.319213,men
215,61ce44bf-1a79-4dc0-bf91-21d008501b14,Reebok,Chalk/Matte Gold-Core Black,2020-10-11,100.0,Reebok Cardi Club C,0.133499,-0.042169,women
216,76869f78-2ee0-4d93-befe-30f2178a7706,Reebok,White/Fluid Blue-Reebok Ice,2020-06-18,120.0,Reebok Question Low,0.099964,0.391667,men


In [20]:
# create a new cumulative df
sneaker_info_df = pd.DataFrame()

In [21]:
# create an empty list to append to 
sets_list = []

In [22]:
# change the column names for all of these randomly named columns
for df in dropped_list:
    df = df.rename(columns={df.columns[0]: 'ID',
                            df.columns[1]: 'Brand',
                            df.columns[2]: 'Colorway',
                            df.columns[3]: 'ReleaseDate',
                            df.columns[4]: 'RetailPrice',
                            df.columns[5]: 'Name',
                            df.columns[6]: 'Volatility',
                            df.columns[7]: 'ChangePercent',
                            df.columns[8]: 'Gender'})
    sets_list.append(df)
    

In [23]:
# take the first set which had the correct column names and reorder to fit the rest of the sets
cols = list(set0.columns.values)
col_correct = list(sneaker_info_df.columns.values)
set0 = set0[['ID','Brand','Colorway','ReleaseDate','RetailPrice','Name','Volatility','ChangePercent','Gender']]

In [24]:
# add in set 0 to complete the dataframe
sets_list.append(set0)
sneaker_info_df = pd.concat(sets_list)

In [25]:
# sort by ascending index
sneaker_info_df = sneaker_info_df.sort_index(ascending=True)

In [26]:
sneaker_info_df.head()

Unnamed: 0,ID,Brand,Colorway,ReleaseDate,RetailPrice,Name,Volatility,ChangePercent,Gender
0,bd225b2f-362f-4475-bd4a-375a23986dee,Yeezy,Resin/Resin/Resin,2021-04-26,55.0,Yeezy Slide,0.086295,0.018072,men
1,a5d54b52-13e4-42fc-8821-71729bfb8b72,Yeezy,Core/Core/Core,2021-04-26,55.0,Yeezy Slide,0.06072,-0.061611,men
2,802d45e3-6eb2-4733-816f-87c7af67f249,New Balance,White/Natural-Green,2021-04-16,130.0,New Balance 550,0.133226,-0.104348,men
3,5f9d25f9-a884-43ee-bf2c-6a066ce66eee,New Balance,White/Navy-Red,2021-04-16,130.0,New Balance 550,0.209088,0.236104,men
4,24453042-3907-4c9c-9a2c-ca7e2ce7ed1a,Converse,White/Bold Orange-Black,2018-10-08,130.0,Converse Chuck Taylor All-Star,0.084268,0.028391,men


In [27]:
# drop any duplicates 
sneaker_info_df.drop_duplicates(subset ="ID",keep = False, inplace = True)

In [28]:
# remove brand from the sneaker name so that it is only the official style name
sneaker_info_df['Name'] = sneaker_info_df['Name'].str.replace(r'adidas ', '')
sneaker_info_df['Name'] = sneaker_info_df['Name'].str.replace(r'Nike ', '')

sneaker_info_df.head()

Unnamed: 0,ID,Brand,Colorway,ReleaseDate,RetailPrice,Name,Volatility,ChangePercent,Gender
110,7154866b-6f46-4525-b7d3-f7f91ba78fab,New Balance,Munsell White/Holly Green,2021-02-27,130.0,New Balance 327,0.093883,0.28355,men
111,de62db29-a612-4824-bfa2-24a757233c17,New Balance,Yellow/White-Black,0,150.0,New Balance Vision Racer,0.151835,0.585,men
112,545efd57-816c-4cd5-8d4a-18deb4af035e,New Balance,White/Grey/Black,0,90.0,New Balance 327,0.697667,-0.044444,men
113,9dd76318-1e22-42fa-a8cf-2228a8a570c1,Crocs,Black,2019-12-10,60.0,Crocs Duet Max Clog,0.545088,-0.179856,men
114,85bf50a1-c610-4d9a-9993-d82a8435a296,Reebok,Dynamic Pink/Dynamic Pink/Clear,2021-02-05,100.0,Reebok Club C Cardi,0.224116,0.84,women


In [29]:
# value counts to see which are the most popular style of shoe
sneaker_info_df["Name"].value_counts().head(630)

Jordan 1 Mid                      63
Yeezy Boost 350 V2                62
Balenciaga Triple S               55
Jordan 1 Retro High               50
Converse Chuck Taylor All-Star    43
                                  ..
Tennis Hu V2                       1
Kobe 4 Protro                      1
Burberry Slides                    1
Balenciaga Speed Lace Up           1
Jordan 1 High Switch               1
Name: Name, Length: 630, dtype: int64

In [31]:
# remove duplicates and only have unique values
unique_sneaker_models = sneaker_info_df["Name"].unique()
# unique_sneaker_models

In [34]:
# test the random sample of 30 sneakers
sneakers_to_search_30 = np.random.choice(unique_sneaker_models,30)

In [35]:
print(sneakers_to_search_30)

['Gucci "Original Gucci" Slide' 'Air VaporMax 2020 Flyknit' 'Top Ten Hi'
 'Air Jordan 1 Mid' 'Dunk High SE' 'Lebron 18' 'Jordan 1 Retro'
 'Jordan XXXV' 'Gucci Falacer' 'New Balance R770' 'Lanvin Leather Curb'
 'Jordan 1 High Switch' 'Dunk Low Retro' 'LouisVuitton Run Away Sneaker'
 'Vans Acer Ni SP' 'YZY QNTM' 'Jordan 1 Retro' 'Fear of God 101 Lace Up'
 'New Balance Vision Racer' 'Air Max Plus' 'New Balance 574'
 'Gucci Ace GG High Top' 'Yung-1' 'React Element 55' 'Saucony Grid 8000'
 'Jordan 1 Zoom CMFT' 'Reebok Question Mid' 'Yeezy 450' 'Jordan XXXV'
 'ZX 10000']


In [36]:
# save the csv to the project's dropbox folder

# running the notebook that contains the twitter api keys to pass the variables in
%run ./dropbox_key.ipynb
# connect to the group project dropbox
DBX = dropbox.Dropbox(dropbox_token)
# save df as csv file
data = sneaker_info_df.to_csv(index=False) # The index parameter is optional
# upload to the project's dropbox folder
with io.BytesIO(data.encode()) as stream:
    stream.seek(0)
    DBX.files_upload(stream.read(), "/sneaker_info_data", mode=dropbox.files.WriteMode.overwrite)

# 3. Shoe Name Seach to generate a url for price history

This portion was created with the intention to use with our time-series analysis. Here we have two methods as to which we can gather our price history info. 
1. By using the shoe search function where we can enter in the shoe name we are searching for, then select the colorway we want, and finally it will provide us with the "sku_id" which is the Stock X shoe identification number. Using this number we can continue and request the information from the price history plot.
2. We use the random shoe generator which will provide us with "sku_id" for a random shoe on the Stock X website.

In [39]:
# import the cleaned sneaker dataset from dropbox
sneaker_info_df = pd.read_csv('https://www.dropbox.com/s/6mj2ntm6meddnqh/sneaker_info_data?dl=1')

In [40]:
# select shoe by name
# make an input where the shoe name can be entered
print("What is the name of the shoe you want to search? ")
shoe_search = input()

# make input lowercase to eliminate any variation in sizing
shoe_search = shoe_search.lower()

What is the name of the shoe you want to search? 
Yeezy 700 V3


In [41]:
# create a list with all the sneaker names in the dataset ensure that they are all 
# lowercased so we can compare it with the input
shoe_names = sneaker_info_df["Name"].to_list()
lower_names = []

for i in shoe_names:
    i = i.lower()
    lower_names.append(i)

# unique brands    
unique_names = set(shoe_names)
# print(unique_names)

#unique brands by lowercase
unique_lower_names = set(lower_names)
# print(unique_lower_names)

# capitalize the first letter of the searched term from the input
shoe_search_capitalized = shoe_search.title()

In [42]:
if shoe_search in unique_lower_names:
    print("We have found that shoe in our data.")
    
    # count of how many types of that name shoe there are
    count_of_name = sum(shoe_search in s for s in lower_names)
    print(f'There are {count_of_name} different colorways of that shoe.')

    # take the shoes of that name and display the colorways to choose from
    sneaker_search_responses = sneaker_info_df.set_index(["Name"])
    print("     ")
    colorway_options = sneaker_info_df[sneaker_info_df["Name"] == shoe_search_capitalized]
    pd.set_option('display.max_rows', colorway_options.shape[0]+1)
    print(colorway_options[["Colorway","Gender"]])
    print("     ")
    
    # choose the colorway
    print("Please specify which colorway you would like: ")
    color_search = input()
    print("     ")
    
    # take the selected colorway and output the ID number
    selected_shoe = colorway_options[colorway_options["Colorway"] == color_search]
    sku_id = selected_shoe["ID"]
    print("ID has been grabbed, proceed to make the request")
#     print(id_num)

else:
    # error message if the shoe isnt in the dataset
    print("We're sorry, the brand you entered wasn't found in our data.")

We have found that shoe in our data.
There are 9 different colorways of that shoe.
     
                           Colorway     Gender
1721        Azareth/Azareth/Azareth        men
1722              Alvah/Alvah/Alvah        men
1726              Azael/Azael/Azael        men
1728        Kyanite/Kyanite/Kyanite        men
1733        Eremial/Eremial/Eremial        men
1768  Safflower/Safflower/Safflower        men
2070        Azareth/Azareth/Azareth    toddler
2112  Safflower/Safflower/Safflower    toddler
2167        Azareth/Azareth/Azareth  preschool
     
Please specify which colorway you would like: 
Alvah/Alvah/Alvah
     
ID has been grabbed, proceed to make the request


In [43]:
# to check if the id was grabbed
print(sku_id)

1722    c24cd262-21b0-447a-bbd6-6f1ce15be54c
Name: ID, dtype: object


In [44]:
# take the ID number from previous cell and insert it in the url
search_url = "https://stockx.com/api/products/" + sku_id + "/chart"
search_url = search_url.values[0]
# display the entire id not just the first few characters
pd.options.display.max_colwidth = 150

In [45]:
# check the link generated
print(search_url)

https://stockx.com/api/products/c24cd262-21b0-447a-bbd6-6f1ce15be54c/chart


# Random Shoe Generator to create url for price history

This part uses a random number generator to then select a shoe out of the dataset and then provide a url for which we can then access the price history.

In [46]:
# check the length of the dataset
len(sneaker_info_df)

2206

In [47]:
# create a random number generator to then select a shoe out of the dataset
n = random.randint(0,2205)
random_sneaker = sneaker_info_df.loc[n,:]
print(random_sneaker[["Name","Colorway"]])
random_id = random_sneaker["ID"]
# create the url using the id from the random generator
random_url = "https://stockx.com/api/products/" + random_id + "/chart"


Name        Gucci Flashtrek
Colorway              Black
Name: 1396, dtype: object


In [48]:
# check the link
print(random_url)

https://stockx.com/api/products/87fff3cf-69d5-46de-8432-a062be9d5547/chart


# Price History Function (Getting the data from the interactive plots)

This portion now uses the url from either the shoe search fucntion or the random shoe generator to access the price history plot on the Stock X website. This information is visible on the website in an interactive plot, however we were able to make a request and gather all of the price history information.

In [49]:
# allow us to control the days for which we want the price history information
day_ago = datetime.today() - timedelta(days=89)
day_ago = day_ago.strftime('%Y-%m-%d')
day_choose = datetime.today().strftime('%Y-%m-%d')

In [57]:
y = []

In [58]:
def get_price_history(url_with_id, day_get, day_ini):

    params = {
        "start_date": day_ini,
        "end_date": day_get,
        "intervals": "100",
        "format": "highstock",
        "currency": "EUR",
        "country": "IT"
    }

    headers = {
        "accept-encoding": "gzip, deflate",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "same-origin",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36",
        "x-requested-with": "XMLHttpRequest"
    }

    response = requests.get(url_with_id, params=params, headers=headers)
    response.raise_for_status()
    price_history = response.json()["series"][0]["data"]
    
    for timestamp, price in response.json()["series"][0]["data"]:
        date = datetime.utcfromtimestamp(int(timestamp) // 1000)
        print(f"[{date}]: €{price}")
        # append to list so data frame can be created
        y.append([date, price])
    
    return 0
    

## Don't forget to pass in the url variable (either search_url OR random_url)
You must use one or the other, but not both.

In [59]:
if __name__ == "__main__":
    import sys
    sys.exit(get_price_history(search_url, day_choose, day_ago))

[2021-03-12 12:15:11]: €324
[2021-03-13 09:36:47]: €320
[2021-03-14 06:58:23]: €329
[2021-03-15 04:19:59]: €320
[2021-03-16 01:41:35]: €329
[2021-03-16 23:03:11]: €329
[2021-03-17 20:24:47]: €322
[2021-03-18 17:46:23]: €325
[2021-03-19 15:07:59]: €334
[2021-03-20 12:29:35]: €334
[2021-03-21 09:51:11]: €326
[2021-03-22 07:12:47]: €341
[2021-03-23 04:34:23]: €334
[2021-03-24 01:55:59]: €331
[2021-03-24 23:17:35]: €342
[2021-03-25 20:39:11]: €338
[2021-03-26 18:00:47]: €330
[2021-03-27 15:22:23]: €334
[2021-03-28 12:43:59]: €326
[2021-03-29 10:05:35]: €338
[2021-03-30 07:27:11]: €336
[2021-03-31 04:48:47]: €343
[2021-04-01 02:10:23]: €346
[2021-04-01 23:31:59]: €344
[2021-04-02 20:53:35]: €347
[2021-04-03 18:15:11]: €350
[2021-04-04 15:36:47]: €338
[2021-04-05 12:58:23]: €347
[2021-04-06 10:19:59]: €343
[2021-04-07 07:41:35]: €347
[2021-04-08 05:03:11]: €353
[2021-04-09 02:24:47]: €352
[2021-04-09 23:46:23]: €351
[2021-04-10 21:07:59]: €339
[2021-04-11 18:29:35]: €355
[2021-04-12 15:51:11

SystemExit: 0

In [61]:
# create a dataframe from the y list
price_history_df = pd.DataFrame(y)

In [62]:
price_history_df

Unnamed: 0,0,1
0,2021-03-12 12:15:11,324
1,2021-03-13 09:36:47,320
2,2021-03-14 06:58:23,329
3,2021-03-15 04:19:59,320
4,2021-03-16 01:41:35,329
...,...,...
95,2021-06-05 01:27:11,389
96,2021-06-05 22:48:47,347
97,2021-06-06 20:10:23,371
98,2021-06-07 17:31:59,357


In [63]:
# save the dataframe to the dropbox file

# running the notebook that contains the twitter api keys to pass the variables in
%run ./dropbox_key.ipynb
# connect to the group project dropbox
DBX = dropbox.Dropbox(dropbox_token)
# save df as csv file
data = price_history_df.to_csv(index=False) # The index parameter is optional
# upload to the project's dropbox folder
with io.BytesIO(data.encode()) as stream:
    stream.seek(0)
    DBX.files_upload(stream.read(), "/price_history_df.csv", mode=dropbox.files.WriteMode.overwrite)