# Project CHRETIEN Tristan | DURAND Victor

## Installation

- Install kaggle using pip3 `pip3 install kaggle`
- Connect to https://kaggle.com 
- Create an account 
- Generate API Keys on your account tab
- Download kaggle.json file and store it under /home/${USER}/.kaggle/kaggle.json


## Download dataset

In [None]:
# Download dataset on kaggle
import kaggle

kaggle.api.authenticate()
# assign directory
directory="./"
kaggle.api.dataset_download_files('vishalsubbiah/pokemon-images-and-types', path=directory, unzip=True)


## Generate files metadata

In [None]:
import os
from PIL import Image
from PIL.ExifTags import TAGS
import pandas as pd
import json
import numpy as np
import math
from sklearn.cluster import KMeans


def kmens_img(image, nb_color):
    numarray = np.array(image.getdata(), np.uint8)
    clusters = KMeans(n_clusters = nb_color)
    clusters.fit(numarray)
    npbins = np.arange(0, nb_color+1)
    histogram = np.histogram(clusters.labels_, bins=npbins)

    color_list = []

    for i in range(nb_color):
        colors = '#%02x%02x%02x' % ( math.ceil(clusters.cluster_centers_[i][0]), 
                    math.ceil(clusters.cluster_centers_[i][1]),
                    math.ceil(clusters.cluster_centers_[i][2]))
        color_list.append(colors)

    return sorted(histogram[0], reverse=True), color_list

df=pd.read_csv('images/pokemon.csv', sep=',',header=None, skiprows=1)
df.replace(np.nan, "")
# print(df.values)
json_data = []
id = 0
for filename in os.listdir("images/images/"):
    f = "images/images/" + filename
    image = Image.open(f)
    metadata = df.loc[df[0] == filename.split(".")[0]]
    # print(image.getdata())
    # histo, colors = kmens_img(image, 2)
    name = metadata[0].values[0]
    id+=1
    json_metadata = {
        "id" : id,
        "properties" : {
            "name" : metadata[0].replace(np.nan, "None").values[0],
            "type1" : metadata[1].replace(np.nan, "None").values[0],
            "type2" : metadata[2].replace(np.nan, "None").values[0]
        },
        "size" : image.size,
        "colors" : '',
        "tags" : [],
        "path" : f 
    }
    # json_metadata = json.dumps(json_metadata)
    json_data.append(json_metadata)
with open("images/metadata/metadata.json", 'w+') as outfile:
    outfile.write(json.dumps(json_data))

## Generating 10 users with images preferences

In [None]:
import random
import json
with open("images/metadata/metadata.json", 'r') as images_infos:
    images_infos = json.load(images_infos)
    print(images_infos[0])
users_preferences = []

number_of_users = 10
for user_id in range(number_of_users):
    favorites_index = random.sample(range(1,len(images_infos)), 8)
    dislike_index = random.choices([i for i in range(1, len(images_infos)) if i not in favorites_index], k=8)

    favorites_types = [[images_infos[index]["properties"]["type1"], images_infos[index]["properties"]["type2"]] for index in favorites_index]
    disliked_types = [[images_infos[index]["properties"]["type1"], images_infos[index]["properties"]["type2"]] for index in dislike_index]
    user_metadata = {
        "id" : user_id +1,
        "favorites" : favorites_index,
        "dislikes" : dislike_index,
        "favorites_types" : list(set(tuple(el) for el in favorites_types)),
        "^disliked_types" : list(set(tuple(el) for el in disliked_types)),
    }
    users_preferences.append(user_metadata)
with open("images/metadata/users_preferences.json", 'w+') as outfile:
    outfile.write(json.dumps(users_preferences))

## Visualisation des données (statistiques)

In [None]:
import json
import pandas as pd
with open('images/metadata/metadata.json','r') as f:
    data = json.loads(f.read())
# df = pd.read_json('images/metadata/metadata.json')

df = pd.json_normalize(
    data, 
    record_path =['students'], 
    meta=[
        'class',
        ['properties', 'type1', 'types2'], 
        # ['info', 'contacts', 'tel']
    ]
)

df.values()


