In [1]:
import pandas as pd 
import requests
from PythonScripts.keys import KEY_ONE
from PIL import Image
import os
import PythonScripts.resize as rs
import PythonScripts.data_clean as dc
from pathlib import Path
# import theme df, join merged_df_three with theme to get star wars theme. 
#add code to not include first columns

# Windows specific read
# inv_df = pd.read_csv('../CSVs/inventories.csv', usecols=['id','set_num'])
# inv_parts_df = pd.read_csv('../CSVs/inventory_parts.csv', usecols=['inventory_id', 'part_num', 'color_id', 'quantity'])
# # set_df = pd.read_csv('../CSVs/sets.csv', usecols=['set_num', 'name', 'year', 'theme_id', 'num_parts'])
# parts_df = pd.read_csv('../CSVs/parts.csv', usecols=['part_num', 'name'])

# # print(inv_parts_df.shape)
# # print(inv_df.shape)
# # print(set_df.shape)
# print(parts_df.shape)



In [2]:
# Function to generate path for reading the csvs from CSV folder on multi-platform
    

sets_path = dc.generate_csv_path('sets.csv')
set_df = pd.read_csv(sets_path, usecols=['set_num', 'name', 'year', 'theme_id', 'num_parts'])

inv_parts_path = dc.generate_csv_path('inventory_parts.csv')
inv_parts_df = pd.read_csv(inv_parts_path, usecols=['inventory_id', 'part_num', 'color_id', 'quantity'])

inv_path = dc.generate_csv_path('inventories.csv')
inv_df = pd.read_csv(inv_path, usecols=['id','set_num'])

parts_path = dc.generate_csv_path('parts.csv')
parts_df = pd.read_csv(parts_path, usecols=['part_num', 'name'])


In [3]:
# Rename columns for easier joining
inv_rename_dict = {'id' : 'inventory_id',
                   'set_num' : 'set_num'}
inv_df.rename(columns=inv_rename_dict, inplace=True)

parts_rename_dict = {'part_num' : 'part_num',
                     'name' : 'part_name'}
parts_df.rename(columns=parts_rename_dict, inplace=True)

In [4]:
# Merge all Dataframes into one larger dataframe with all data points
print(inv_df.shape)

all_merged_df = inv_df.merge(inv_parts_df, how='inner', left_on='inventory_id', right_on='inventory_id')
print(all_merged_df.shape)

all_merged_df = all_merged_df.merge(set_df, how='left', left_on='set_num', right_on='set_num')
print(all_merged_df.shape)

merged_df = all_merged_df.merge(parts_df, how='left', left_on='part_num', right_on='part_num')
print(all_merged_df.shape)


(33221, 2)
(1041633, 5)
(1041633, 9)
(1041633, 9)


In [5]:
# Remove all non-Star Wars themes from the Dataframe
print(all_merged_df.shape)

sw_theme_ids = [18, 158, 171, 209, 261]
all_merged_df = all_merged_df[all_merged_df['theme_id'].isin(sw_theme_ids)]

all_merged_df.shape


(1041633, 9)


(86526, 9)

In [6]:
# find set number with the most pieces
piece_count = all_merged_df.groupby(['set_num'])['quantity'].sum()

max_count = piece_count.idxmax()
print(max_count)


75192-1


In [7]:
# drop all rows but set with most pieces
mask = all_merged_df[all_merged_df['set_num'] != max_count].index
all_merged_df.drop(mask, inplace=True)


In [8]:
# Make a new df with just top 10 parts with highest quantity. Save as CSV to use in Tableau dashboard 
top_ten_parts = all_merged_df['quantity'].nlargest(n=10, keep='first')
top_ten_df = all_merged_df[all_merged_df['quantity'].isin(top_ten_parts)]
top_ten_df.reset_index(drop=True, inplace=True)



In [9]:
# Write top_ten_df to csv on the local drive for use in Tableau
file_path = dc.write_csv('top_ten_parts.csv')
top_ten_df.to_csv(file_path)

In [10]:
# initialize new lists for color id and part number of top 10 piece quantities. Zip to tuple to lock in for API calls
# API call to find part specs for each part/color combination and save the image URL to a list
# add the URLs to the top ten Dataframe
part_num_list =[]
part_color_list = []

for item in top_ten_df['part_num']:
    part_num_list.append(item)
for item in top_ten_df['color_id']:
    part_color_list.append(str(item))
num_color_zip = zip(part_num_list,part_color_list)

url_list = []
for num, color in num_color_zip:
    response = requests.get(f'https://rebrickable.com/api/v3/lego/parts/{num}/colors/{color}?key={KEY_ONE}')
    data = response.json()
    url_list.append(str(data['part_img_url']))

url_df = pd.DataFrame({'part_num' : part_num_list,
                       'URL' : url_list})

url_df['part_num'] = url_df['part_num'].astype(str)
url_df['URL'] = url_df['URL'].astype(str)

top_ten_df = top_ten_df.merge(url_df, how='left', left_on='part_num', right_on='part_num')

file_path = dc.write_csv('url_list.csv')
top_ten_df.to_csv(file_path)
    

In [11]:
# Save images and resize for Tableau
# rs.write_image(url_list)
# rs.resize_files('../JPGs/')
