# AGNN RS model implementation
This file implements the Artificial Genetic Neural Network recommender system, using the [AGGN journal](journals/AGNN.pdf).

## 2. Collect dataset

The rating matrix has been generated using the [ratings_matrix.ipynb](./ratings_matrix.ipynb) script.

In [9]:
# 2: Collect [r]x*y dataset;
import pandas as pd

ratings_matrix = pd.read_csv('data/ratings_matrix.csv')
ratings_matrix = ratings_matrix.iloc[:,:1000] # Limiting dataset as it would take a week to run.
ratings_matrix

Unnamed: 0,userId,1,2,3,4,5,6,7,8,9,...,1291,1292,1293,1295,1296,1297,1298,1299,1300,1301
0,1,4.0,,4.0,,,4.0,,,,...,5.0,,,,,,5.0,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,4.0,,,,,,,,,
4,5,4.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,606,2.5,,,,,,2.5,,,...,4.0,,4.0,,4.5,,,3.5,3.5,
606,607,4.0,,,,,,,,,...,4.0,,,,,,,,,
607,608,2.5,2.0,2.0,,,,,,,...,4.0,,,,,,,,,
608,609,3.0,,,,,,,,,...,,,,,,,,,,


## 3. Calculate `sim(om_s, om_t)` using (1);

![Similarity function](images/eq_1.png)

In [10]:
# 3: Calculate sim(om_s, om_t) using (1);

def sim(om_s: pd.DataFrame, om_t: pd.DataFrame) -> float:
    """Generates a similarity value between two online movies."""
    
    #Merging the two movies so we can find common users who rated both
    temp_om = pd.concat([om_s, om_t], axis=1)
    temp_om.head()
    
    # Dropping rows with NaNs as this yields movies with common ratings
    common_om = temp_om.dropna()
    common_om.head()
    
    # Splitting the data again
    om_s = common_om.iloc[0]
    om_t = common_om.iloc[1]

    #Getting similarity
    similarity = sum(om_s * om_t) / sum(om_s + om_t)
    return similarity

In [11]:
# Dropping the `userId` column, as it is not necessary for this step.
ratings_matrix.drop('userId', axis=1, inplace=True)
ratings_matrix.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1291,1292,1293,1295,1296,1297,1298,1299,1300,1301
0,4.0,,4.0,,,4.0,,,,,...,5.0,,,,,,5.0,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,4.0,,,,,,,,,
4,4.0,,,,,,,,,,...,,,,,,,,,,


In [12]:
similarities_list = [] # Where the average similarities will be stored.

# Iterating over every movie
for om_s_id, om_s in ratings_matrix.iteritems():

    similarities = []
    # Iterating over movies to check similarity against
    for om_t_id, om_t in ratings_matrix.iteritems():

        # Making sure we don't check the similarity of a movie against itself
        if om_s_id == om_t_id:
            continue
        
        similarity = sim(om_s, om_t)
        similarities.append(similarity)
    
    # Calculating average similarity
    avg_similarity = sum(similarities) / ratings_matrix.columns
    
    # Appending ID and avg. sim. to list
    similarities_list.append((om_s_id, avg_similarity))
    
similarities_list = sorted(similarities_list, key=lambda tupl: tupl[1])
similarities_list

IndexError: single positional indexer is out-of-bounds