In [31]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
ratings = pd.read_csv('/home/vaserkn/VSCode/Anime_rec_sys/ratings.csv')

In [3]:
anime = pd.read_csv('/home/vaserkn/VSCode/Anime_rec_sys/anime.csv')
anime = anime.drop(columns=['Unnamed: 0'])

# Рекомендации на основе контента

## Рекомендации на основе жанров

In [4]:
anime['Genres'] = anime['Genres'].astype(str)
genres = anime['Genres'].apply(lambda r: str(r.split(',')))

In [5]:
genres.iloc[0]

"['Action', ' Adventure', ' Comedy', ' Drama', ' Sci-Fi', ' Space']"

In [6]:
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0.0)
tfidf_matrix = tfidf.fit_transform(genres)

In [7]:
tfidf_matrix.shape

(17562, 973)

In [10]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [11]:
indices = pd.Series(anime.index, index=anime['Name'])

In [12]:
def get_recommendation(title):
    idx = indices[title]  # Находим индекс в данных по названию
    sim_scores = list(enumerate(cosine_sim[idx]))  # Создаём лист с индексам и коэфф. схожести между заданным и остальными аниме
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Сортируем sim_scores
    sim_scores = sim_scores[1:11]  # Выбираем 10 самых похожих
    anime_indices = [i[0] for i in sim_scores]  # Сохраняем индексы
    return anime.iloc[anime_indices].drop(columns=['MAL_ID']).reset_index(drop=True) # Возвращаем фильмы соответствующие индексам 

In [13]:
get_recommendation('Overlord')

Unnamed: 0,Name,Score,Genres,Type,Episodes,Aired,Duration,Rating,Popularity,Members,Favorites
0,Ku Pao Ying Xiong,5.69,"Action, Adventure, Fantasy, Game, Magic",TV,26.0,"Sep 2, 2017 to Feb 17, 2018",14 min. per ep.,,7613.0,3271.0,6.0
1,Ys IV: The Dawn of Ys,5.79,"Action, Adventure, Fantasy, Game",Special,1.0,1993,3 min.,PG-13 - Teens 13 or older,11649.0,605.0,0.0
2,Sword Art Online II: Debriefing,6.56,"Action, Adventure, Fantasy, Game",Special,1.0,"Oct 11, 2014",24 min.,PG-13 - Teens 13 or older,1547.0,89257.0,363.0
3,Valhait Rising: Kandou e.,5.61,"Action, Adventure, Fantasy, Game",ONA,1.0,"Jul 6, 2016",2 min.,G - All Ages,14836.0,206.0,0.0
4,The Movie: Sekai no Mukou ni,7.14,"Action, Adventure, Fantasy, Game, Magic, Myste...",Movie,1.0,"Jan 21, 2012",1 hr. 50 min.,PG-13 - Teens 13 or older,4028.0,16190.0,26.0
5,Rara Maji,5.91,"Fantasy, Game, Magic, Music",ONA,1.0,"Jan 27, 2017",1 min.,G - All Ages,11984.0,531.0,0.0
6,Mashiro Witch,5.5,"Game, Magic",ONA,1.0,"Mar 14, 2017",2 min.,PG-13 - Teens 13 or older,14375.0,235.0,0.0
7,Juuni Kokuki,8.06,"Action, Adventure, Fantasy, Magic, Supernatural",TV,45.0,"Apr 9, 2002 to Aug 30, 2003",25 min. per ep.,PG-13 - Teens 13 or older,1266.0,113627.0,2261.0
8,Hajimari no Boukensha-tachi: Legend of Crystania,6.06,"Action, Adventure, Fantasy, Magic, Supernatural",Movie,1.0,"Jul 29, 1995",1 hr. 20 min.,PG-13 - Teens 13 or older,6829.0,4391.0,6.0
9,Legend of Crystania,6.02,"Action, Adventure, Fantasy, Magic, Supernatural",OVA,3.0,"Nov 21, 1996 to Apr 23, 1997",45 min. per ep.,R - 17+ (violence & profanity),6909.0,4242.0,6.0


#### Рекомендации на основе всех имеющихся текстовых данных

In [14]:
soup = anime['Genres'] + anime['Type'] + anime['Rating']
soup = soup.astype(str)

In [15]:
soup.iloc[0]

'Action, Adventure, Comedy, Drama, Sci-Fi, SpaceTVR - 17+ (violence & profanity)'

In [21]:
soup = soup.apply(lambda x: str.lower(x.replace(' ', '')))

In [19]:
soup = soup.apply(lambda x: ' '.join(x))

In [22]:
soup.iloc[0]

'action,adventure,comedy,drama,sci-fi,spacetvr-17+(violence&profanity)'

In [23]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0.0, stop_words='english')
count_matrix = count.fit_transform(soup)

In [24]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [25]:
get_recommendation('Overlord')

Unnamed: 0,Name,Score,Genres,Type,Episodes,Aired,Duration,Rating,Popularity,Members,Favorites
0,Overlord III,7.95,"Action, Magic, Fantasy, Game, Supernatural",TV,13.0,"Jul 10, 2018 to Oct 2, 2018",23 min. per ep.,R - 17+ (violence & profanity),205.0,549484.0,4115.0
1,Fate/stay night: Unlimited Blade Works,8.22,"Action, Fantasy, Magic, Supernatural",TV,12.0,"Oct 12, 2014 to Dec 28, 2014",28 min. per ep.,R - 17+ (violence & profanity),119.0,742718.0,12756.0
2,Fate/stay night: Unlimited Blade Works 2nd Season,8.33,"Action, Fantasy, Magic, Supernatural",TV,13.0,"Apr 5, 2015 to Jun 28, 2015",23 min. per ep.,R - 17+ (violence & profanity),175.0,605173.0,7133.0
3,Dororo to Hyakkimaru,7.17,"Action, Adventure, Supernatural",TV,26.0,"Apr 6, 1969 to Sep 28, 1969",26 min. per ep.,R - 17+ (violence & profanity),4568.0,11954.0,76.0
4,Legend of Crystania,6.02,"Action, Adventure, Fantasy, Magic, Supernatural",OVA,3.0,"Nov 21, 1996 to Apr 23, 1997",45 min. per ep.,R - 17+ (violence & profanity),6909.0,4242.0,6.0
5,Densetsu no Yuusha no Densetsu: Iris Report,6.55,"Action, Adventure, Fantasy, Magic, Shounen",Special,1.0,"Oct 14, 2010",23 min.,R - 17+ (violence & profanity),3895.0,17411.0,22.0
6,Garo: Honoo no Kokuin,7.41,"Action, Demons, Fantasy, Magic, Supernatural",TV,24.0,"Oct 4, 2014 to Mar 28, 2015",24 min. per ep.,R - 17+ (violence & profanity),1222.0,120332.0,416.0
7,Blade & Soul,6.09,"Action, Adventure, Fantasy, Martial Arts",TV,13.0,"Apr 4, 2014 to Jun 27, 2014",24 min. per ep.,R - 17+ (violence & profanity),1299.0,109933.0,137.0
8,"Gate: Jieitai Kanochi nite, Kaku Tatakaeri",7.74,"Action, Adventure, Fantasy, Military",TV,12.0,"Jul 4, 2015 to Sep 19, 2015",23 min. per ep.,R - 17+ (violence & profanity),177.0,601037.0,5306.0
9,Nejimaki Seirei Senki: Tenkyou no Alderamin,7.73,"Action, Adventure, Fantasy, Military",TV,13.0,"Jul 9, 2016 to Oct 1, 2016",23 min. per ep.,R - 17+ (violence & profanity),589.0,245963.0,1225.0


#### Рекомендации на основе текстовых данных с учётом рейтинга и популярности

In [26]:
def filter_anime(prm, anime):
    col = prm[0]
    th = prm[1]
    if th == 'High':
        anime = anime.loc[anime[col] >= anime[col].quantile(0.75)]  # Находим аниме с значением >= 75% всех значений  в колонке
    elif th == 'Medium':
        anime = anime.loc[anime[col] >= anime[col].quantile(0.5)]  # Находим аниме с значением >= 50% всех значений  в колонке
    elif th == 'Low':
        anime = anime.loc[anime[col] >= anime[col].quantile(0.25)]  # Находим аниме с значением >= 25% всех значений  в колонке
    return anime  # Возвращаем отфильтрованный список


In [29]:
def get_filtered_rec(title, params=[('Score', 'High'), ('Popularity', 'Medium')]):
    idx = indices[title]  # Находим индекс в данных по названию
    sim_scores = list(enumerate(cosine_sim[idx]))  # Создаём лист с индексам и коэфф. схожести между заданным и остальными аниме
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Сортируем sim_scores
    sim_scores = sim_scores[1:100]  # Выбираем 10 самых похожих
    anime_indices = [i[0] for i in sim_scores]  # Сохраняем индексы

    y = anime.iloc[anime_indices]  # Находим аниме по индексу

    for prm in params:
        y = filter_anime(prm, y)
    return y.drop(columns='MAL_ID').reset_index(drop=True)

In [30]:
get_filtered_rec('Death Note')

Unnamed: 0,Name,Score,Genres,Type,Episodes,Aired,Duration,Rating,Popularity,Members,Favorites
0,Death Note: Rewrite,7.71,"Mystery, Police, Psychological, Supernatural, ...",Special,2.0,"Aug 31, 2007 to Aug 22, 2008",1 hr. 52 min. per ep.,R - 17+ (violence & profanity),965.0,154021.0,658.0
1,Kindaichi Shounen no Jikenbo,7.98,"Mystery, Shounen",TV,148.0,"Apr 7, 1997 to Sep 11, 2000",24 min. per ep.,R - 17+ (violence & profanity),3514.0,21714.0,196.0
2,Id:Invaded,7.9,"Mystery, Police, Psychological, Sci-Fi",TV,13.0,"Jan 6, 2020 to Mar 23, 2020",24 min. per ep.,R - 17+ (violence & profanity),704.0,211234.0,1833.0
3,Higurashi no Naku Koro ni Kai,8.23,"Mystery, Psychological, Supernatural, Thriller",TV,24.0,"Jul 6, 2007 to Dec 18, 2007",24 min. per ep.,R - 17+ (violence & profanity),344.0,385728.0,6668.0
4,Yuukoku no Moriarty,7.97,"Mystery, Historical, Psychological, Shounen",TV,11.0,"Oct 11, 2020 to Dec 20, 2020",23 min. per ep.,R - 17+ (violence & profanity),1089.0,135996.0,1364.0
5,Shiki Specials,7.81,"Mystery, Horror, Supernatural, Thriller, Vampire",Special,2.0,"May 25, 2011 to Jun 22, 2011",23 min. per ep.,R - 17+ (violence & profanity),2012.0,59835.0,92.0
6,Beastars 2nd Season,8.06,"Slice of Life, Psychological, Drama, Shounen",TV,,"Jan 7, 2021 to ?",22 min.,R - 17+ (violence & profanity),851.0,176946.0,883.0
7,Koukaku Kidoutai: Stand Alone Complex - The La...,8.1,"Action, Sci-Fi, Mystery, Police, Psychological...",Special,1.0,"Sep 23, 2005",2 hr. 40 min.,R - 17+ (violence & profanity),2751.0,34649.0,56.0
8,Jin-Rou,7.79,"Military, Police, Psychological, Drama, Romance",Movie,1.0,"Jun 3, 2000",1 hr. 42 min.,R - 17+ (violence & profanity),1181.0,124856.0,1359.0
9,Kuroshitsuji: Book of Circus,8.13,"Action, Mystery, Comedy, Historical, Demons, S...",TV,10.0,"Jul 11, 2014 to Sep 12, 2014",24 min. per ep.,R - 17+ (violence & profanity),482.0,295450.0,3512.0
