# Import of python library


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.cluster
from PIL import Image
import math

from sklearn.preprocessing import LabelEncoder
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier ,export_graphviz

from IPython.display import display
import random


## 1. Creation of the color bank


In [None]:

# charging the color bank from the CSV file
names=["Color name", "RGB", "HEX"]
with open("./colorPalette.csv", "r") as file:
    lines = file.read().splitlines()
    lines = lines[1:]
    for i,line in enumerate(lines):
        line = line.split("\t")
        r = int (line[1].split(",")[0].split("(")[1])
        g = int (line[1].split(",")[1])
        b = int (line[1].split(",")[2].split(")")[0])
       
        line[1] = (r, g, b)
        lines[i] = line
colorBank = pd.DataFrame(lines, columns=names)   

In [None]:
# Normalize to the nearrest color from our color bank to limit the nuances of color (almost infinit with our kmeans method)
def distanceCalcul(color1, color2):
    return math.sqrt( (color2[0]-color1[0])**2 + (color2[1]-color1[1])**2 + (color2[2]-color1[2])**2)

def colorNormalisation(paletteColor: pd.DataFrame , color : tuple) -> str:
    distanceMin = math.inf
    
    for index in paletteColor.index:
        distance = distanceCalcul(paletteColor["RGB"][index], color)
        if (distance < distanceMin):
            distanceMin = distance
            colordistancemin = paletteColor["Color name"][index]
    
    return colordistancemin

# print(colorNormalisation(colorPalette, (44, 143,163)))


# 2. Preparation of the dataset Images


## Exctraction of the images files


In [None]:
rootDirectory = "./flowers"
# for all images the path will be rootDirectory / plantName /file  example ./flowers/jpeg-192x192/train/alpine sea holly/11331.jpeg


subdir = []
informationsOnImages = []
for repo, subrepos, files in os.walk(rootDirectory):
    subdir.append(subrepos)
    # filelist.append(files)

print(subdir[0])
for dir in subdir[0]:
  
    # for all images the path will be rootDirectory / plantName /file  || example ./flowers/jpeg-192x192/train/alpine sea holly/11331.jpeg

    filelist = []
    namelist = []
    for repo, subrepos, files in os.walk("{}/{}/train".format(rootDirectory, dir)):
        namelist.append(subrepos)
        filelist.append(files)

    namelist = namelist[0]
    filelist = filelist[1:]

    
    

    for name, files  in zip(namelist, filelist):
            for i, image in enumerate( files) :
                # limite at 10 images by type for the need of the test (1040 images out of 12000)
                if i < 10:                                                              
                    informationsOnImages.append({"size": int(dir[9:]), "name":name, "file":image, "color1":"", "color2": "", "color3": ""})
                else:
                    break
            
datasetImages = pd.DataFrame(informationsOnImages)

# datasetImages


## Extraction of principal colors


In [None]:

nb_cluster = 5
for index, row in datasetImages.iterrows():

    imgfile = Image.open(
        "{root}/jpeg-{size}x{size}/train/{type}/{file}".format(
            root = rootDirectory,
            size = row["size"], 
            type = row["name"], 
            file = row["file"]
            )
        )
    
    # extraction of 5 main color
    numarray = np.array(imgfile.getdata(), np.float64)
    clusters = sklearn.cluster.MiniBatchKMeans(n_clusters= nb_cluster)
    clusters.fit(numarray)

    # ordering the colors
    npbins = np.arange(0, nb_cluster + 1)
    histogram = np.histogram(clusters.labels_, bins=npbins)
    palette = clusters.cluster_centers_
    indicesMax = histogram[0].argsort()

    # add the 3 main colors normalized into the datasetImage
    datasetImages.loc[index, "color1"] = colorNormalisation(colorBank, palette[indicesMax[-1]])
    datasetImages.loc[index, "color2"] = colorNormalisation(colorBank, palette[indicesMax[-2]])
    datasetImages.loc[index, "color3"] = colorNormalisation(colorBank, palette[indicesMax[-3]])   
    imgfile.close() 

       
# datasetImages.head(15)

## Save the dataset into json file to avoid making the Kmeans each time


In [None]:
# save the dataframe to json File to avoid having to execute MinibatchKmeans
datasetImages.to_json(path_or_buf="./data.json", orient="index")

# 3. Creation of the classes


## To load the Dataset from json file


In [None]:
# Extract data from json file to load the dataframe with colors
datasetImages = pd.read_json(path_or_buf="./data.json", orient="index")
print(datasetImages)

# Creation of the users class


In [None]:
class user:
    
    def __init__(self, age = None, sex = None, preferredColor=None):

        # 'equivalent of java surcharge function'
        # 2 ways to create users 
        # normal user we ask the user his values as input 
        # fake user of testing process we can pass values
        if age == None:
            self.__age = self.setAge()
        else: 
            self.__age = age

        if sex == None:
            self.__sex = self.setSex()
        else: 
            self.__sex = sex

        if  preferredColor == None:
            self.__preferredColor = self.setpreferredColor()
        else:
            self.__preferredColor = preferredColor

        self.__likedImages = pd.DataFrame()
        self.__unlikedImages = pd.DataFrame()
        self.__decisionTree = tree.DecisionTreeClassifier()

    def setAge(self):
        ok = False
        while not ok:
            age = input("What is your age: ")
            if age.isnumeric():
                age = int(age)
                ok = True
        return age
    def setSex(self):
        sex = ""
        while sex not in ("M", "F"):
            sex = input("Please select sex (M/F):").upper()
        return sex


    def setpreferredColor(self):
        # user can select his prefered color in our colorBank(137 colors)
        print(colorBank["Color name"])
        preferredColor = ""
        while preferredColor not in map(str.lower, colorBank["Color name"].values):
            preferredColor = input("Please select your preferred color in the list above").lower()
        
        return preferredColor

    def addImagesHistory(self, images:pd.DataFrame, notation:list):
        images = images.reset_index(drop=True)
        
        for index, image in images.iterrows():
            if notation[index] == "y":
                self.__likedImages = self.__likedImages.append(image)
            else:
                self.__unlikedImages = self.__unlikedImages.append(image)
        self.__likedImages["like"] = 1
        self.__unlikedImages["like"] = 0
            

    def getAge(self):
        return self.__age
    
    def getSex(self):
        return self.__sex

    def getPreferredColor(self):
        return self.__preferredColor

    def getLikedImages(self):
        return self.__likedImages.drop(["like"], axis=1)

    def getUnlikedImages(self):
        return self.__unlikedImages.drop(["like"], axis=1)    

   

    def trainDecisionTree(self):
        # creation of the trainset  with the historique of all images that has been view by user
        trainImages = self.__likedImages.append(self.__unlikedImages, ignore_index=True)

        # shuffle the trainset
        trainImages = trainImages.iloc[np.random.permutation(len (trainImages))]

        # get the result from the trainset (1 or 0 for lliked or disliked)
        result = trainImages["like"]

        # drop the columns that doesn't count in the fitting
        trainImages = trainImages.drop(["file", "like"], axis=1)
        trainImages = trainImages.reset_index(drop=True)

        # fitting
        self.__decisionTree = self.__decisionTree.fit(trainImages, result)

    def printDecisionTree(self):

        tree.plot_tree(self.__decisionTree)



    def predictLikedImages(self, images):
        # make a prediction on like or not of the images presented
        return self.__decisionTree.predict(images)




# Creation of the recommendation System


In [None]:
class recommendationSystem:
    def __init__(self, Images: pd.DataFrame):
        self.listUser = pd.DataFrame()
        self.datasetImages = Images
        self.LabelEncoder = self.createLabelEncoder()
        self.datasetImagesEncoded = self.fitEncodeLabels(self.datasetImages.copy())
        self.fitEncodeResults(["n", "y"])

    ## fonctions to code and decode the pd.DataFrame 
    def createLabelEncoder(self) -> None:
        leName = LabelEncoder()
        leTaille = LabelEncoder()
        leColor1 = LabelEncoder()
        leColor2= LabelEncoder()
        leColor3 = LabelEncoder()
        leResult = LabelEncoder()
        return {
            "name" : leName, 
            "size" : leTaille, 
            "color1" : leColor1, 
            "color2" : leColor2, 
            "color3" : leColor3, 
            "result" : leResult
            }


    def fitEncodeLabels(self, sets:pd.DataFrame) -> pd.DataFrame:
        sets["name"] = self.LabelEncoder["name"].fit_transform(sets["name"])
        sets["size"] = self.LabelEncoder["size"].fit_transform(sets["size"])
        sets["color1"] = self.LabelEncoder["color1"].fit_transform(sets["color1"])
        sets["color2"] = self.LabelEncoder["color2"].fit_transform(sets["color2"])
        sets["color3"] = self.LabelEncoder["color3"].fit_transform(sets["color3"])

        return sets

    def fitEncodeResults(self, result: list) -> None:
        self.LabelEncoder["result"].fit_transform(result)

    
    def encodeLabels(self, sets:pd.DataFrame) -> pd.DataFrame:
        sets["name"] = self.LabelEncoder["name"].transform(sets["name"])
        sets["size"] = self.LabelEncoder["size"].transform(sets["size"])
        sets["color1"] = self.LabelEncoder["color1"].transform(sets["color1"])
        sets["color2"] = self.LabelEncoder["color2"].transform(sets["color2"])
        sets["color3"] = self.LabelEncoder["color3"].transform(sets["color3"])

        return sets

    def decodeLabels(self,sets:pd.DataFrame ) -> pd.DataFrame:
        sets["name"] = self.LabelEncoder["name"].inverse_transform(sets["name"])
        sets["size"] = self.LabelEncoder["size"].inverse_transform(sets["size"])
        sets["color1"] = self.LabelEncoder["color1"].inverse_transform(sets["color1"])
        sets["color2"] = self.LabelEncoder["color2"].inverse_transform(sets["color2"])
        sets["color3"] = self.LabelEncoder["color3"].inverse_transform(sets["color3"])

        return sets

    def encodeResults(self, result: list) -> list:
        return self.LabelEncoder["result"].transform(result)

    def decodeResults(self, result:list) -> list:
        return self.LabelEncoder["result"].inverse_transform(result)

    def addUserToList(self, newUser:user):
        # for add an user to our data base
        self.listUser = self.listUser.append(pd.DataFrame([
                {
                    "user": newUser,
                    "age":newUser.getAge(), 
                    "sex": newUser.getSex(), 
                    "color": newUser.getPreferredColor()
                }
            ]), ignore_index=True)

    def createSpecialUser(self, numberOfSimulatedImages:int)-> user:
        #creation of the user
        age = random.randint(10,90),
        sex = random.choice(["M", "F"])
        preferredColor = random.choice(colorBank["Color name"])
        newUser = user(age=age, sex=sex, preferredColor=preferredColor)
        self.addUserToList(newUser=newUser)

        # simulate the choice of 50 images
        imageslist = self.datasetImagesEncoded.sample(numberOfSimulatedImages)
        result = random.choices(["y", "n"] , k=numberOfSimulatedImages)
        
        # add alls theses images to the history of the user
        newUser.addImagesHistory(imageslist, result)
        # train the Decision tree 
        newUser.trainDecisionTree()
        return newUser
   
    def createUser(self) -> user:
        # create the user parsing parameter for the false user or the test
        newUser = user()
        # ask the user a first round of images
        self.askFirstImages(newUser)
        # add the user to our database of users
        self.addUserToList(newUser=newUser)
        return newUser


    def getImages(self, imageslist, client:user) -> None:
        
        rootDirectory = "./flowers"
        result = []
        for index, image in imageslist.iterrows():
            # opening each images to propose to the user
            imgfile = Image.open(
                "{root}/jpeg-{size}x{size}/train/{type}/{file}".format(
                    root = rootDirectory,
                    size = image["size"], 
                    type = image["name"], 
                    file = image["file"]
                    )
                )
            
            display(imgfile)
            valid = False
            # get the user response on the image (force the response format) 
            while not valid:
                    userLike = input("Do you like this image? (y/n)").lower()
                    if (userLike == "y" or userLike == "n"):
                        valid = True
                        result.append(userLike)
         
            imgfile.close()
            # for proc in psutil.process_iter():
            #     if proc.name() == "display":
            #         proc.kill()

        # recode the images to give to the fitting function
        imageslist = self.encodeLabels(imageslist)
        # add alls theses imges to the history of the user
        client.addImagesHistory(imageslist, result)
        # train the Decision tree 
        client.trainDecisionTree()
       

    def askFirstImages(self, client:user):
        firstImages = pd.DataFrame()
        age = client.getAge()
        sex = client.getSex()

        # selecting a sample of images to present to the user for the first time 
        try:
            # can add the preferred color 
            # could be better not to selected directly in people with the same age and sex but to do
            # a collaborative filtering, but real data are needed for this king of ML random data will give 
            # absurd values
            similarUsers = self.listUser[( abs(self.listUser["age"]-age) < 5 ) & (self.listUser["sex"] == sex)]
            for user in similarUsers:
                firstImages = firstImages.append(user.getLikedImages())

            if len(firstImages["file"]) > 15:
                firstImages = firstImages.sample(15)

        except:
            firstImages = self.datasetImagesEncoded.sample(15)

        # decoding the table to can be able to get the path
        firstImages = self.decodeLabels(firstImages)
        
        # calling the display function to get user preferences
        self.getImages(imageslist=firstImages, client=client)

    

    def askNewImages(self, client: user, numberOfImagesMax:int) -> None:
        imagesPredictedLike = pd.DataFrame()
        imagesPredictedDislike = pd.DataFrame()
        predictedset = []
        
        # selecting new sample of images based on user preferences
        for i in range(100):
            # try 100 times to get some images that are predicted to be liked if not working
            # means that we don't have enough data so we get default images 
            newImageset = self.datasetImagesEncoded.sample(numberOfImagesMax, ignore_index=True)
            
            # submitting the sample to the DecisionTree to know of the user would like those images
            predictedset = client.predictLikedImages(newImageset.drop(["file"], axis=1))

            # if we found images that will fit with the user preferences we quit the loop
            if any(predictedset):
                break
        
        # decoding the dataset of images to get the real path
        newImageset = self.decodeLabels(newImageset)

        if any(predictedset): 
            # separation of the images labeled as likeable and dislikeable 
            for index, image in newImageset.iterrows():
                if predictedset[index] :
                    imagesPredictedLike = imagesPredictedLike.append(image)
                else:
                    imagesPredictedDislike = imagesPredictedDislike.append(image)
            
            # adding some images labeled as unliked to keep bias 
            imagesPredictedLike =   imagesPredictedLike if imagesPredictedDislike.empty else imagesPredictedLike.append(imagesPredictedDislike.sample(frac=0.1))  

        else: # getting default images 
            imagesPredictedLike = imagesPredictedLike.append(newImageset)

        # parsing the size to int cause the decode methode tranforme our 224 to 224.0 which not exist in path
        imagesPredictedLike["size"] = imagesPredictedLike["size"].astype(int)
        
        # limit the sampling a 10 to avoid presenting the user to much images at the same time
        if imagesPredictedLike["name"].size > 10:
            imagesPredictedLike = imagesPredictedLike.sample(10)
        
        # calling the display function to get user preferences
        self.getImages(imageslist=imagesPredictedLike, client=client)

    


## ecrire le rapport 
        

# 4. Use of the system

## creation of an instance 

In [None]:
flowers = recommendationSystem(Images=datasetImages.copy())

## Creation of an user 

In [None]:
newuser = flowers.createUser(age=20, sex="M", preferredColor="red")

# method for a user to ask for new images

In [None]:
flowers.askNewImages(newuser)

# 5. Simulation

## Fake user to fill de data base

In [None]:
flowers = recommendationSystem(Images = datasetImages.copy())

## creation of fake users
nbFakeUser = 10
listFUsers = []
for i in range(nbFakeUser):
    listFUsers.append(flowers.createSpecialUser(numberOfSimulatedImages=10))

print(flowers.listUser)

In [None]:
# graphviz didn't work on window so I can't get it with decoded names 
listFUsers[3].printDecisionTree()  

## Real user

In [None]:
realUser = flowers.createUser()

In [None]:
flowers.askNewImages(realUser)

In [None]:
realUser.printDecisionTree()

# 6. Data Visualisation
