In [133]:
#Dependencies
import pymongo
import pandas as pd
import numpy as np

# Extract CSVs into DataFrames

In [134]:
#Read index file
index_file = "resources/index.csv"
index_df = pd.read_csv(index_file)
index_df.head()

Unnamed: 0.1,Unnamed: 0,path,class_id,train-valid
0,0,marvel/0001/001.jpg,1,train
1,1,marvel/0001/002.jpg,1,valid
2,2,marvel/0001/003.jpg,1,train
3,3,marvel/0001/004.jpg,1,train
4,4,marvel/0001/005.jpg,1,train


In [135]:
#Read metadata file
metadata_file = "resources/metadata.csv"
metadata_df = pd.read_csv(metadata_file)
metadata_df.head()

Unnamed: 0.1,Unnamed: 0,class_id,lego_ids,lego_names,minifigure_name
0,0,1,[76115],['Spider Mech vs. Venom'],SPIDER-MAN
1,1,2,[76115],['Spider Mech vs. Venom'],VENOM
2,2,3,[76115],['Spider Mech vs. Venom'],AUNT MAY
3,3,4,[76115],['Spider Mech vs. Venom'],GHOST SPIDER
4,4,5,[75208],"[""Yoda's Hut""]",YODA


In [137]:
# Clean data
metadata_df['lego_ids'] = metadata_df.apply(lambda x: x['lego_ids'].replace('[', '').replace(']', ''),axis=1)
metadata_df.head()

Unnamed: 0.1,Unnamed: 0,class_id,lego_ids,lego_names,minifigure_name
0,0,1,76115,['Spider Mech vs. Venom'],SPIDER-MAN
1,1,2,76115,['Spider Mech vs. Venom'],VENOM
2,2,3,76115,['Spider Mech vs. Venom'],AUNT MAY
3,3,4,76115,['Spider Mech vs. Venom'],GHOST SPIDER
4,4,5,75208,"[""Yoda's Hut""]",YODA


In [138]:
# Clean data
metadata_df['lego_names'] = metadata_df.apply(lambda x: x['lego_names'].replace('[', '').replace(']', '').replace("'", "").replace('"', ""),axis=1)
metadata_df.head()

Unnamed: 0.1,Unnamed: 0,class_id,lego_ids,lego_names,minifigure_name
0,0,1,76115,Spider Mech vs. Venom,SPIDER-MAN
1,1,2,76115,Spider Mech vs. Venom,VENOM
2,2,3,76115,Spider Mech vs. Venom,AUNT MAY
3,3,4,76115,Spider Mech vs. Venom,GHOST SPIDER
4,4,5,75208,Yodas Hut,YODA


# Transform dataFrames

In [139]:
index_cols = ["Unnamed: 0", "path", "class_id"]
index_transformed = index_df[index_cols].copy()
# Rename the column headers
index_transformed = index_transformed.rename(columns={"Unnamed: 0": "image_id",
                                                      "path": "image_path"})
# Set index
#index_transformed.set_index("image_id", inplace=True)
index_transformed.head()

Unnamed: 0,image_id,image_path,class_id
0,0,marvel/0001/001.jpg,1
1,1,marvel/0001/002.jpg,1
2,2,marvel/0001/003.jpg,1
3,3,marvel/0001/004.jpg,1
4,4,marvel/0001/005.jpg,1


In [140]:
metadata_cols = ["class_id", "lego_ids", "lego_names", "minifigure_name"]
metadata_transformed = metadata_df[metadata_cols].copy()
# Rename the column headers
metadata_transformed = metadata_transformed.rename(columns={"lego_ids": "lego_id",
                                                            "lego_names": "lego_name"})
# Set index
#metadata_transformed.set_index("class_id", inplace=True)
metadata_transformed.head()

Unnamed: 0,class_id,lego_id,lego_name,minifigure_name
0,1,76115,Spider Mech vs. Venom,SPIDER-MAN
1,2,76115,Spider Mech vs. Venom,VENOM
2,3,76115,Spider Mech vs. Venom,AUNT MAY
3,4,76115,Spider Mech vs. Venom,GHOST SPIDER
4,5,75208,Yodas Hut,YODA


# Create database connection

In [141]:
# Setup connection to mongodb
conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

# Select database and collection to use
db = client.legoDB
legoCT = db.legoCT
legoIMG = db.legoIMG

In [142]:
db.legoCT.drop()
db.legoIMG.drop()

# Load DataFrames into database

In [143]:
legoCT.insert_many(metadata_transformed.to_dict('records'))

<pymongo.results.InsertManyResult at 0x7fdc39cb0640>

In [144]:
legoIMG.insert_many(index_transformed.to_dict('records'))

<pymongo.results.InsertManyResult at 0x7fdc3c2e4910>

In [147]:
collection = legoCT.find({'minifigure_name': 'SPIDER-MAN'})

In [148]:
for i in lego_images:
    print(i)

{'_id': ObjectId('5f598e7e19f4bbb655f0a5bd'), 'image_id': 0, 'image_path': 'marvel/0001/001.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5be'), 'image_id': 1, 'image_path': 'marvel/0001/002.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5bf'), 'image_id': 2, 'image_path': 'marvel/0001/003.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c0'), 'image_id': 3, 'image_path': 'marvel/0001/004.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c1'), 'image_id': 4, 'image_path': 'marvel/0001/005.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c2'), 'image_id': 5, 'image_path': 'marvel/0001/006.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c3'), 'image_id': 6, 'image_path': 'marvel/0001/007.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c4'), 'image_id': 7, 'image_path': 'marvel/0001/008.jpg', 'class_id': 1}
{'_id': ObjectId('5f598e7e19f4bbb655f0a5c5'), 'image_id': 8, 'image_path': 'marvel/0001/009.jpg'

In [150]:
for i in legoCT.find({'minifigure_name': 'SPIDER-MAN'}):
    print(i)

{'_id': ObjectId('5f598e7c19f4bbb655f0a5aa'), 'class_id': 1, 'lego_id': '76115', 'lego_name': 'Spider Mech vs. Venom', 'minifigure_name': 'SPIDER-MAN'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5ba'), 'class_id': 17, 'lego_id': '76128', 'lego_name': 'Molten Man Battle', 'minifigure_name': 'SPIDER-MAN'}


In [151]:
for i in legoCT.find({}):
    print(i)

{'_id': ObjectId('5f598e7c19f4bbb655f0a5aa'), 'class_id': 1, 'lego_id': '76115', 'lego_name': 'Spider Mech vs. Venom', 'minifigure_name': 'SPIDER-MAN'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5ab'), 'class_id': 2, 'lego_id': '76115', 'lego_name': 'Spider Mech vs. Venom', 'minifigure_name': 'VENOM'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5ac'), 'class_id': 3, 'lego_id': '76115', 'lego_name': 'Spider Mech vs. Venom', 'minifigure_name': 'AUNT MAY'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5ad'), 'class_id': 4, 'lego_id': '76115', 'lego_name': 'Spider Mech vs. Venom', 'minifigure_name': 'GHOST SPIDER'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5ae'), 'class_id': 5, 'lego_id': '75208', 'lego_name': 'Yodas Hut', 'minifigure_name': 'YODA'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5af'), 'class_id': 6, 'lego_id': '75208', 'lego_name': 'Yodas Hut', 'minifigure_name': 'LUKE SKYWALKER'}
{'_id': ObjectId('5f598e7c19f4bbb655f0a5b0'), 'class_id': 7, 'lego_id': '75208', 'lego_name': 'Yodas Hut', 'minifigure_