In [1]:
import pandas as pd
from pytictoc import TicToc
from itertools import combinations as comb
import concepts
import numpy as np
import math

In [51]:
def typeCompression(df):
    for col in df.columns:
        if(df[col].dtype == 'float64'):
            df[col] = df[col].astype('float32')
    return df

def sparsityReducer(df):
    user_df = df.groupby('User')['Rating'].agg(['count','mean'])
    item_df = df.groupby('Item')['Rating'].agg(['count','mean'])

    print(user_df)

    user_threshold = user_df['count'].quantile(0.2)
    item_threshold = item_df['count'].quantile(0.2)

    drop_user = user_df[user_df['count']<user_threshold].index
    drop_item = item_df[item_df['count']<item_threshold].index

    print(drop_user,drop_item)
    
    print('Original Shape: {}'.format(df.shape))
    df = df[~df['Item'].isin(drop_item)]
    df = df[~df['User'].isin(drop_user)]
    print('After Trim Shape: {}'.format(df.shape))
    
    return df

def sparseChanger(df,num):
    user_df = df.groupby('User')['Rating'].agg(['count','mean'])
    #print(user_df)
    #user_threshold = user_df['count'].quantile(0.2)
    
    if(num==50):
        drop_user = user_df[user_df['count']<50].index
    else:
        drop_user = user_df[user_df['count']>800].index
    #print(drop_user)
    
    print('Original Shape: {}'.format(df.shape))
    df = df[~df['User'].isin(drop_user)]
    print('After Trim Shape: {}'.format(df.shape))
    
    return df

def generateConcepts(filename):
    #c = concepts.load_csv(filename)
    df = pd.read_csv(filename, index_col='User', nrows = 100,usecols = np.arange(0,10))
    
    max_val = int(max(df.columns))
    #objects = df.index.tolist()
    objects = [x for x in range(max_val+1, max_val+df.shape[0]+1)]
    properties = list(df.columns)
    bools = list(df.fillna(False).astype(bool).itertuples(index=False, name=None))

    c_ = concepts.Context(objects, properties, bools)
    
    final_concept=[]

    for c in c_.todict()['lattice']:
        final_concept.append([c[0],[c[1]]])
    
    return final_concept

In [10]:
mlens_df = pd.read_csv('dataset/movie_lens.dat',delimiter='::',header = None, 
                 names = ['User','Item','Rating'], usecols = [0,1,2], engine = 'python')
mlens_df = typeCompression(mlens_df)
new_df = sparsityReducer(mlens_df)

      count      mean
User                 
1        53  4.188679
2       129  3.713178
3        51  3.901961
4        21  4.190476
5       198  3.146465
...     ...       ...
6036    888  3.302928
6037    202  3.717822
6038     20  3.800000
6039    123  3.878049
6040    341  3.577713

[6040 rows x 2 columns]
Int64Index([   4,    7,   12,   14,   16,   20,   21,   41,   43,   47,
            ...
            5984, 5993, 5999, 6008, 6012, 6017, 6027, 6029, 6034, 6038],
           dtype='int64', name='User', length=1205) Int64Index([  33,   37,   53,   56,   59,   67,   75,   84,   90,   96,
            ...
            3891, 3892, 3894, 3899, 3904, 3905, 3933, 3940, 3941, 3944],
           dtype='int64', name='Item', length=730)
Original Shape: (1000209, 3)
After Trim Shape: (960887, 3)


In [11]:
new_df.to_csv('dataset.csv',index = None)

In [16]:
df_pivot = pd.pivot_table(new_df,values='Rating',columns='Item',index='User')

In [34]:
df_pivot.to_csv('user_item_matrix.csv')

In [35]:
df_pivot.head()

Item,1,2,3,4,5,6,7,8,9,10,...,3942,3943,3945,3946,3947,3948,3949,3950,3951,3952
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,2.0,,,,,...,,,,,,,,,,
6,4.0,,,,,,,,,,...,,,,,,,,,,


In [53]:
item_lst=list(df_pivot.columns)
user_lst=list(df_pivot.index)

t = TicToc()
t.tic()

#Generate Concepts
concept = generateConcepts('user_item_matrix.csv')

print('No of Concepts Generated : ',len(concept))
print(t.toc())
print('Concepts List : \n')

final_concept=[]

for c in concept:
    usr=[]
    itm=[]
    
    for u in c[0]:
        usr.append(user_lst[u])
    for i in c[1][0]:
        itm.append(item_lst[i])
    
    final_concept.append([usr,[itm]])

final_concept

No of Concepts Generated :  29
Elapsed time is 0.116257 seconds.
None
Concepts List : 



[[[], [[1, 2, 3, 4, 5, 6, 7, 8, 9]]],
 [[26], [[1, 3, 4, 5, 7]]],
 [[48], [[1, 2, 5, 6, 7]]],
 [[62], [[2, 3, 6, 7]]],
 [[73], [[1, 8, 9]]],
 [[92], [[1, 2, 6, 8]]],
 [[8, 26], [[1, 4]]],
 [[10, 48], [[1, 2, 7]]],
 [[26, 45], [[1, 3, 7]]],
 [[26, 48], [[1, 5, 7]]],
 [[36, 48], [[1, 6, 7]]],
 [[48, 62], [[2, 6, 7]]],
 [[65, 73], [[1, 9]]],
 [[10, 48, 62], [[2, 7]]],
 [[23, 48, 92], [[1, 2, 6]]],
 [[26, 45, 62], [[3, 7]]],
 [[36, 48, 62], [[6, 7]]],
 [[65, 73, 103], [[9]]],
 [[75, 92, 119], [[1, 2, 8]]],
 [[23, 48, 62, 92], [[2, 6]]],
 [[73, 75, 92, 119], [[1, 8]]],
 [[23, 36, 48, 56, 92], [[1, 6]]],
 [[10, 26, 34, 36, 45, 48], [[1, 7]]],
 [[10, 26, 34, 35, 36, 45, 48, 62], [[7]]],
 [[10, 18, 23, 44, 48, 60, 75, 92, 109, 114, 119], [[1, 2]]],
 [[5, 15, 23, 36, 42, 48, 52, 56, 62, 63, 92, 93, 105], [[6]]],
 [[10, 13, 18, 23, 27, 40, 44, 48, 53, 60, 62, 75, 83, 92, 109, 114, 119],
  [[2]]],
 [[1,
   6,
   8,
   9,
   10,
   18,
   19,
   23,
   26,
   28,
   34,
   36,
   38,
   44,
   45,

In [54]:
sparse_df = sparseChanger(mlens_df,50)
dense_df = sparseChanger(mlens_df,800)

Original Shape: (1000209, 3)
After Trim Shape: (943471, 3)
Original Shape: (1000209, 3)
After Trim Shape: (887764, 3)


In [62]:
sparse_pivot = pd.pivot_table(sparse_df,values='Rating',columns='Item',index='User').iloc[0:1500,:]
dense_pivot = pd.pivot_table(dense_df,values='Rating',columns='Item',index='User').iloc[0:1500,:]

In [64]:
sparse_pivot.shape

(1500, 3689)

In [41]:
class GeneticEvolution:
    
    def __init__(self,ip,t_user):
        self.init_pop = ip
        self.target_user = t_user
        
    def selection(self):
        self.fittest = self.init_pop[0]

def Evolution(init_pop,target_user):
    

int

In [60]:
while (demo.population.fittest < 5):
    ++demo.generationCount 

    #Do selection
    demo.selection() 

    #Do crossover
    demo.crossover() 

    #Do mutation under a random probability
    if (rn.nextInt()%7 < 5):
        demo.mutation() 
     

    #Add fittest offspring to population
    demo.addFittestOffspring() 

    #Calculate new fitness value
    demo.population.calculateFitness() 

    print("Generation: " + demo.generationCount + " Fittest: " + demo.population.fittest) 
 

print("\nSolution found in generation " + demo.generationCount) 
print("Fitness: "+demo.population.getFittest().fitness) 
System.out.print("Genes: ") 

for (int i = 0  i < 5  i++):
    System.out.print(demo.population.getFittest().genes[i]) 
 

print("") 


#Selection
def selection():

    #Select the most fittest individual
    fittest = population.getFittest() 

    #Select the second most fittest individual
    secondFittest = population.getSecondFittest() 
 

#Crossover
def crossover():
    Random rn = Random() 

    #Select a random crossover point
    crossOverPoint = rn.nextInt(population.individuals[0].geneLength) 

    #Swap values among parents
    for ( i = 0  i < crossOverPoint  i++):
        temp = fittest.genes[i] 
        fittest.genes[i] = secondFittest.genes[i] 
        secondFittest.genes[i] = temp 

     
 

#Mutation
def mutation():
    Random rn = new Random() 

    #Select a random mutation point
    mutationPoint = rn.nextInt(population.individuals[0].geneLength) 

    #Flip values at the mutation point
    if (fittest.genes[mutationPoint] == 0):  
        fittest.genes[mutationPoint] = 1 
    else:
        fittest.genes[mutationPoint] = 0 
     

    mutationPoint = rn.nextInt(population.individuals[0].geneLength) 

    if (secondFittest.genes[mutationPoint] == 0):  
        secondFittest.genes[mutationPoint] = 1 
    else:  
        secondFittest.genes[mutationPoint] = 0 
     
 

#Get fittest offspring
def getFittestOffspring():  
    if (fittest.fitness > secondFittest.fitness):  
        return fittest 
     
    return secondFittest 
 


#Replace least fittest individual from most fittest offspring
def addFittestOffspring()  

    #Update fitness values of offspring
    fittest.calcFitness() 
    secondFittest.calcFitness() 

    #Get index of least fit individual
    leastFittestIndex = population.getLeastFittestIndex() 

    #Replace least fittest individual from most fittest offspring
    population.individuals[leastFittestIndex] = getFittestOffspring() 

 


#Individual class
class Individual:

    fitness = 0 
    genes = new int[5] 
    geneLength = 5 

    public Individual()  
        Random rn = new Random() 

        #Set genes randomly for each individual
        for (i = 0  i < genes.length  i++):  
            genes[i] = Math.abs(rn.nextInt() % 2) 
         

        fitness = 0 
     

    #Calculate fitness
    def calcFitness()  

        fitness = 0 
        for (i = 0  i < 5  i++)  
            if (genes[i] == 1)  
                ++fitness 
             
         
     
 

#Population class
class Population: 

    popSize = 10 
    Individual[] individuals = Individual[10] 
    fittest = 0 

    #Initialize population
    def initializePopulation(int size):  
        for (i = 0  i < individuals.length  i++)  
            individuals[i] = new Individual() 
         
     

    #Get the fittest individual
    def getFittest():  
        maxFit = Integer.MIN_VALUE 
        maxFitIndex = 0 
        for (i = 0  i < individuals.length  i++):  
            if (maxFit <= individuals[i].fitness):  
                maxFit = individuals[i].fitness 
                maxFitIndex = i 
             
         
        fittest = individuals[maxFitIndex].fitness 
        return individuals[maxFitIndex] 
     

    #Get the second most fittest individual
    def getSecondFittest()  
        maxFit1 = 0 
        maxFit2 = 0 
        for (i = 0  i < individuals.length  i++)  
            if (individuals[i].fitness > individuals[maxFit1].fitness):  
                maxFit2 = maxFit1 
                maxFit1 = i 
            elif (individuals[i].fitness > individuals[maxFit2].fitness):  
                maxFit2 = i 
             
         
        return individuals[maxFit2] 
     

    #Get index of least fittest individual
    def getLeastFittestIndex():
        minFitVal = Integer.MAX_VALUE 
        minFitIndex = 0 
        for (i = 0  i < individuals.length  i++):
            if (minFitVal >= individuals[i].fitness):  
                minFitVal = individuals[i].fitness 
                minFitIndex =i 
             
         
        return minFitIndex 
     

    #Calculate fitness of each individual
    def calculateFitness():  

        for (i = 0  i < individuals.length  i++):  
            individuals[i].calcFitness() 
         
        getFittest() 
     

 

(4835, 2976)