In [70]:
import pandas as pd
import networkx as nx
from community import community_louvain
from tqdm import tqdm

## Getting Genre/Partition dictionaries

In [19]:
## Dictionary with Movie as Key and Genre as Value
# code taken from Group Project MWRP.ipynb because the import of this dictionary takes a long time

data_for_clusters =  pd.read_csv("10%Dataset.csv") 

data_for_clusters = data_for_clusters[["Movie", "MovieGenre"]]

dataclusters = data_for_clusters.drop_duplicates().reset_index(drop=True)

keys = dataclusters["Movie"]
values = dataclusters["MovieGenre"]

dict_mvs_genre = dict(zip(keys, values))

print(dict_mvs_genre)



{'B000NDFLWG': 'Documentary', 'B008FPU7AA': 'Horror', '1562229567': 'Documentary', '1888617047': 'Musicals', '6305508569': 'Action', '6304474415': 'Documentary', 'B0006FFRD4': 'Anime & Manga', 'B003BUAP10': 'Documentary', 'B000BB1NFO': 'Documentary', 'B00020HBN2': 'Anime & Manga', 'B000GAKJXM': 'Documentary', 'B00020HBNW': 'Anime & Manga', 'B00020HBNC': 'Anime & Manga', 'B0012EM5GK': 'Anime & Manga', 'B0002V7TJM': 'Documentary', 'B0002V7TJC': 'Documentary', 'B00004R7CH': 'Musicals', 'B0002V7TJW': 'Documentary', 'B00000IC80': 'Musicals', '6300246299': 'Horror', 'B00000IC8F': 'Musicals', 'B0014567LQ': 'Horror', 'B001DWNUBU': 'Documentary', 'B008R52L4I': 'Horror', 'B000H5U5IU': 'Horror', 'B000VZC4XA': 'Horror', 'B0071BY2YQ': 'Documentary', '0764004492': 'Horror', 'B006HNRSFE': 'Documentary', 'B0007M21Y4': 'Musicals', '6300266850': 'Horror', 'B000KGGJ04': 'Action', 'B000A9QK8M': 'Action', 'B003ZZ7TT2': 'Musicals', 'B000FP2PE4': 'Documentary', '6304462689': 'Documentary', 'B004J0DQS0': 'Doc

In [54]:
print(len(dict_mvs_genre))

21788


In [20]:
G = nx.read_weighted_edgelist('10%Network.csv')

In [67]:
# Use louvain algorithm to find partitions
# running this takes ~3 mins
division = community_louvain.best_partition(G)
print(division)

{'B000NDFLWG': 5, '6305804427': 5, 'B0001IXT36': 5, 'B000EOTEIQ': 5, 'B000ARFPV2': 5, 'B00003ETLD': 5, 'B000B0WO4O': 5, 'B000NJM6EG': 5, 'B000PAAIQ6': 5, 'B000BQ5IWS': 5, 'B000NA2780': 5, 'B0000B1OG1': 5, 'B000FZEQVE': 5, 'B000069503': 5, 'B0001I555G': 5, 'B0007Y08JM': 5, 'B000LV6ONA': 5, 'B004MYFVPE': 5, 'B000GB75CE': 5, 'B0002ZDWGC': 5, 'B000IHY9ES': 5, 'B0009IXRMG': 5, 'B00005MKJ2': 5, 'B000KJU1I2': 5, 'B0001I5556': 5, '6305472564': 5, 'B000621NRQ': 5, 'B00009PBHJ': 1, 'B000MQ4WXS': 2, 'B000CDSS6S': 5, 'B0000CBXZW': 5, 'B000E8NRPI': 5, 'B000GTJSGC': 5, 'B000EOTEAY': 5, 'B00008G43J': 5, 'B000CCCZYK': 5, 'B00004TSYQ': 5, 'B0001I555Q': 5, 'B0000CBXZT': 5, 'B00004T3B2': 5, 'B000HWXOT0': 5, 'B00005OCQQ': 5, 'B00009PBJ5': 1, 'B0001GH6SC': 5, 'B00005MKIX': 5, 'B000ILYYZ8': 5, 'B0002IQNEC': 5, 'B00067BC8Q': 5, 'B001QZZFTO': 5, 'B000F9RM2U': 5, 'B0000C2IXK': 5, 'B0000A02UH': 5, 'B000087F3O': 5, 'B00009MECT': 5, 'B00009MECP': 5, 'B000MAFXQO': 3, 'B000EOTEMC': 5, 'B00004T3AY': 5, 'B00006954J':

In [68]:
print(len(set(division.values())))

868


## Ratio of partitions corresponding to each genre

In [80]:
# making dictionaries of all films in each genre
Docs = {key for key in dict_mvs_genre if dict_mvs_genre[key] == 'Documentary'}
Act = {key for key in dict_mvs_genre if dict_mvs_genre[key] == 'Action'}
Hor = {key for key in dict_mvs_genre if dict_mvs_genre[key] == 'Horror'}
Mus = {key for key in dict_mvs_genre if dict_mvs_genre[key] == 'Musicals'}
AnM = {key for key in dict_mvs_genre if dict_mvs_genre[key] == 'Anime & Manga'}
#'Action', 'Horror', 'Documentary', 'Musicals', 'Anime & Manga'

def get_partitions(som):
    parts = set()
    for film in som:
        parts.add(division.get(film))
    return parts

In [82]:
# testing partitions function
print(get_partitions(Docs))

{0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 33, 34, 36, 37, 42, 43, 45, 46, 47, 48, 50, 51, 52, 53, 54, 56, 58, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 80, 84, 85, 86, 87, 88, 89, 91, 92, 93, 95, 97, 98, 99, 100, 102, 103, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 119, 120, 121, 125, 126, 127, 128, 129, 130, 133, 134, 135, 137, 141, 142, 143, 144, 145, 146, 148, 149, 151, 152, 153, 154, 155, 158, 161, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 179, 180, 181, 182, 183, 184, 185, 186, 189, 190, 191, 193, 194, 197, 198, 200, 201, 202, 203, 204, 205, 207, 208, 209, 210, 212, 217, 218, 219, 221, 222, 223, 224, 225, 227, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 244, 245, 247, 248, 249, 250, 251, 252, 253, 255, 256, 257, 258, 259, 263, 265, 267, 270, 271, 272, 273, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 294, 295, 296,

In [93]:
def percentage_in_genre(genre):
    ''' This function returns the ratio of each partition within a genre, and returns the overall ratio'''
    tot = 0
    i = 0
    for part in tqdm(get_partitions(genre)):
        # ignores partitions that aren't in the genre
        if not len({key for key in division if division[key] == part}) == 0:
            i += 1
            # gets the number of elements from that are in the same partition, but not in the genre
            dif = len({key for key in division if division[key] == part}.difference(genre))
            tot += (len({key for key in division if division[key] == part}) - dif)/len({key for key in division if division[key] == part})
        else:
            continue
    return tot/i  

In [94]:
doc_ratio = percentage_in_genre(Docs)
act_ratio = percentage_in_genre(Act)
hor_ratio = percentage_in_genre(Hor)
mus_ratio = percentage_in_genre(Mus)
anm_ratio = percentage_in_genre(AnM)

print(f"{doc_ratio} of Documentaries are reviewed by the same accounts")
print(f"{act_ratio} of Action films are reviewed by the same accounts")
print(f"{hor_ratio} of Horror films are reviewed by the same accounts")
print(f"{mus_ratio} of Musicals are reviewed by the same accounts")
print(f"{anm_ratio} of Anime and Manga films are reviewed by the same accounts")

100%|████████████████████████████████████████████████████████████████████████████████| 585/585 [00:05<00:00, 99.05it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 91.89it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 139/139 [00:01<00:00, 109.85it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 101/101 [00:00<00:00, 110.60it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 113/113 [00:01<00:00, 89.63it/s]

0.9558936196728721 of Documentaries are reviewed by the same accounts
0.5920973636033136 of Action films are reviewed by the same accounts
0.861819145669971 of Horror films are reviewed by the same accounts
0.8119132216258467 of Musicals are reviewed by the same accounts
0.8202932226598451 of Anime and Manga films are reviewed by the same accounts



