In [1]:
import pandas as pd
import requests
from keys import KEY_TWO


In [2]:
# API Call to get list of LEGO themes and convert to dataframe
themes = requests.get(f'https://brickset.com/api/v3.asmx/getThemes?apiKey={KEY_TWO}') 
data = themes.json()
df = pd.json_normalize(data, 'themes')


In [3]:
# Drop themes older than 1999, themes with less than 50 sets, sets that aren't currently in production, minifig theme, and miscellaneous theme
mask = df[(df['yearFrom'] < 1999) | (df['setCount'] < 50) | (df['yearTo'] < 2022) |
         (df['theme'] == 'Collectable Minifigures') | (df['theme'] == 'Miscellaneous')].index
df.drop(mask, inplace=True)
df.shape

(17, 5)

In [4]:
# Generate sample theme list to use in 2nd API call. Convert list to string for API parameters.
theme_list = []
for item in df['theme'].sample(3):
    theme_list.append(item)
param_string = ", ".join(theme_list)
param_string

'Disney, Speed Champions, Ideas'

In [5]:
# 2nd API call to get a full set list for themes in the theme list generated by first API call. Convert to a data frame
parameters = {'theme' : f'{param_string}', 'pageSize' : 2500}
set_list = requests.get(f"https://brickset.com/api/v3.asmx/getSets?apiKey={KEY_TWO}&userHash=&params={parameters}")
set_data = set_list.json()
set_df = pd.json_normalize(set_data,'sets')
set_df.shape


(241, 44)

In [6]:
# List of columns to drop from Dataframe. Columns not relevant to analysis.
drop_list = [
            'numberVariant',
            'released',
            'category',
            'bricksetURL',
            'reviewCount',
            'packagingType',
            'availability',
            'instructionsCount',
            'additionalImageCount',
            'lastUpdated',
            'image.thumbnailURL',
            'image.imageURL',
            'collections.ownedBy',
            'collections.wantedBy',
            'dimensions.height',
            'dimensions.width',
            'dimensions.depth',
            'LEGOCom.US.retailPrice',
            'LEGOCom.US.dateFirstAvailable',
            'LEGOCom.US.dateLastAvailable',
            'LEGOCom.UK.retailPrice',
            'LEGOCom.UK.dateFirstAvailable',
            'LEGOCom.UK.dateLastAvailable',
            'LEGOCom.CA.retailPrice',
            'LEGOCom.CA.dateFirstAvailable',
            'LEGOCom.CA.dateLastAvailable',
            'dimensions.weight',
            'barcode.EAN',
            'barcode.UPC',
            'minifigs',
            'LEGOCom.DE.retailPrice',
            'LEGOCom.DE.dateFirstAvailable',
            'LEGOCom.DE.dateLastAvailable'
            ]
set_df.drop(columns=drop_list, inplace=True)
set_df

Unnamed: 0,setID,number,name,year,theme,themeGroup,subtheme,pieces,rating,ageRange.min,ageRange.max
0,31423,10772,Mickey Mouse's Propeller Plane,2021,Disney,Licensed,Mickey and Friends,59.0,3.7,4.0,
1,31424,10773,Minnie Mouse's Ice Cream Shop,2021,Disney,Licensed,Mickey and Friends,100.0,0.0,4.0,
2,31425,10774,Mickey Mouse & Minnie Mouse's Space Rocket,2021,Disney,Licensed,Mickey and Friends,88.0,0.0,4.0,
3,31641,10775,Mickey Mouse & Donald Duck's Farm,2021,Disney,Licensed,Mickey and Friends,118.0,0.0,4.0,
4,31426,10776,Mickey & Friends Fire Truck & Station,2021,Disney,Licensed,Mickey and Friends,144.0,0.0,4.0,
...,...,...,...,...,...,...,...,...,...,...,...
236,32519,BELLE,Belle's Castle,2016,Disney,Licensed,Beauty and the Beast,,0.0,,
237,29813,OLAF,Olaf Box,2019,Disney,Licensed,Frozen II,,0.0,,
238,26645,TRUCOGSWORTH,Cogsworth,2016,Disney,Licensed,Beauty and the Beast,47.0,0.0,,
239,26644,TRULUMIERE,Lumiere,2016,Disney,Licensed,Beauty and the Beast,23.0,0.0,,


In [7]:
# Drop rows where there is no rating for the set.
mask_two = set_df[set_df['rating'] == 0].index
set_df.drop(mask_two, inplace=True)
set_df.shape

(161, 11)

In [8]:
# Drop any rows if they have a NaN value in the pieces column
pieces_null = set_df['pieces'].isnull().values.any()
print(pieces_null)
if pieces_null == True:
    set_df.dropna(subset=['pieces'], inplace=True)
set_df.shape

False


(161, 11)

In [9]:
#Convert pieces, minimum age range, and maximum age range from floats to ints.
set_df['pieces'] = set_df['pieces'].astype(pd.Int64Dtype())
set_df['ageRange.min'] = set_df['ageRange.min'].astype(pd.Int64Dtype())
set_df['ageRange.max'] = set_df['ageRange.max'].astype(pd.Int64Dtype())

In [10]:
rename_dict = {
               'setID' : 'Set ID',
               'number' : 'Set Number',
               'name' : 'Set Name',
               'year' : 'Release Year',
               'theme' : 'Theme',
               'themeGroup' : 'Theme Group',
               'subtheme' : 'Subtheme',
               'pieces' : 'Number of Pieces',
               'rating' : 'Brickset Rating',
               'ageRange.min' : 'Min Age Range',
               'ageRange.max' : 'Max Age Range',
                }
                

set_df.rename(columns=rename_dict, inplace=True)
set_df

Unnamed: 0,Set ID,Set Number,Set Name,Release Year,Theme,Theme Group,Subtheme,Number of Pieces,Brickset Rating,Min Age Range,Max Age Range
0,31423,10772,Mickey Mouse's Propeller Plane,2021,Disney,Licensed,Mickey and Friends,59,3.7,4,
10,9285,21101,Hayabusa,2012,Ideas,Miscellaneous,Licensed,369,4.2,12,
11,9342,21102,Minecraft Micro World: The Forest,2012,Ideas,Miscellaneous,Licensed,480,3.9,10,
12,10240,21103,The DeLorean Time Machine,2013,Ideas,Miscellaneous,Licensed,401,4.0,10,
13,22830,21104,NASA Mars Science Laboratory Curiosity Rover,2014,Ideas,Miscellaneous,NASA,295,4.2,10,
...,...,...,...,...,...,...,...,...,...,...,...
211,32120,76908,Lamborghini Countach,2022,Speed Champions,Licensed,Lamborghini,262,4.6,,
212,32121,76909,Mercedes-AMG F1 W12 E Performance & Mercedes-A...,2022,Speed Champions,Licensed,Mercedes,564,4.3,,
213,32122,76910,Aston Martin Valkyrie AMR Pro and Aston Martin...,2022,Speed Champions,Licensed,Aston Martin,592,4.2,,
216,30857,92176,NASA Apollo Saturn V,2020,Ideas,Miscellaneous,NASA,1969,4.6,14,


In [11]:
set_df = set_df.sample(100)

In [12]:
# Save data to CSV for visualization in Tableau
set_df.to_csv('./set_list.csv')