In [1]:
import duckdb
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

con = duckdb.connect(":memory:")
con.sql("load postgres;")
con.sql(
    f"ATTACH 'dbname={os.getenv('DB_NAME')} user={os.getenv('DB_USER')} host={os.getenv('DB_HOST')} password={os.getenv('DB_PASSWORD')} port={os.getenv('DB_PORT')}' AS supabase (TYPE postgres, SCHEMA 'public');"
)

In [3]:
anime = con.sql("select * from supabase.anime;").df()

In [4]:
anime

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
7581,29633,Kiseki,Dementia,Movie,1,5.27,130
7582,19125,Mogura no Adventure,"Adventure, Kids",Special,1,5.27,213
7583,9506,Prism Magical: Prism Generations!,"Comedy, Ecchi, Magic",OVA,1,5.27,1733
7584,2262,Sin: The Movie,"Action, Horror, Police, Sci-Fi, Shounen, Super...",OVA,1,5.27,3117


In [5]:
anime["genre"]

0                    Drama, Romance, School, Supernatural
1       Action, Adventure, Drama, Fantasy, Magic, Mili...
2       Action, Comedy, Historical, Parody, Samurai, S...
3                                        Sci-Fi, Thriller
4       Action, Comedy, Historical, Parody, Samurai, S...
                              ...                        
7581                                             Dementia
7582                                      Adventure, Kids
7583                                 Comedy, Ecchi, Magic
7584    Action, Horror, Police, Sci-Fi, Shounen, Super...
7585                                Comedy, Magic, Sci-Fi
Name: genre, Length: 7586, dtype: object

In [6]:
anime["genre"] = anime["genre"].str.strip()
anime["genre"] = anime["genre"].str.lower()
anime["genre"]

0                    drama, romance, school, supernatural
1       action, adventure, drama, fantasy, magic, mili...
2       action, comedy, historical, parody, samurai, s...
3                                        sci-fi, thriller
4       action, comedy, historical, parody, samurai, s...
                              ...                        
7581                                             dementia
7582                                      adventure, kids
7583                                 comedy, ecchi, magic
7584    action, horror, police, sci-fi, shounen, super...
7585                                comedy, magic, sci-fi
Name: genre, Length: 7586, dtype: object

In [7]:
anime["genre"].str.split(",").explode("")

0               drama
0             romance
0              school
0        supernatural
1              action
            ...      
7584          shounen
7584     supernatural
7585           comedy
7585            magic
7585           sci-fi
Name: genre, Length: 26684, dtype: object

In [8]:
genres = anime["genre"].str.split(",").explode("")
genres

0               drama
0             romance
0              school
0        supernatural
1              action
            ...      
7584          shounen
7584     supernatural
7585           comedy
7585            magic
7585           sci-fi
Name: genre, Length: 26684, dtype: object

In [9]:
genres.info()

<class 'pandas.core.series.Series'>
Index: 26684 entries, 0 to 7585
Series name: genre
Non-Null Count  Dtype 
--------------  ----- 
26684 non-null  object
dtypes: object(1)
memory usage: 416.9+ KB


In [11]:
genres = genres.to_frame()
genres

Unnamed: 0,genre
0,drama
0,romance
0,school
0,supernatural
1,action
...,...
7584,shounen
7584,supernatural
7585,comedy
7585,magic


In [12]:
genres["genre"] = genres["genre"].str.strip()
genres["genre"] = genres["genre"].str.lower()

In [13]:
genres

Unnamed: 0,genre
0,drama
0,romance
0,school
0,supernatural
1,action
...,...
7584,shounen
7584,supernatural
7585,comedy
7585,magic


In [14]:
genres.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26684 entries, 0 to 7585
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   genre   26684 non-null  object
dtypes: object(1)
memory usage: 416.9+ KB


In [15]:
genres_gb = genres.groupby("genre")["genre"].count().reset_index(name="count")
genres_gb

Unnamed: 0,genre,count
0,,10
1,action,2327
2,adventure,1804
3,cars,48
4,comedy,3383
5,dementia,67
6,demons,194
7,drama,1512
8,ecchi,571
9,fantasy,1647


In [16]:
genres_gb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   genre   41 non-null     object
 1   count   41 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 788.0+ bytes


In [None]:
genres_gb["genre"].unique()

In [18]:
types = anime.groupby("type")["type"].count().reset_index(name="count")
types

Unnamed: 0,type,count
0,Movie,1296
1,Music,295
2,ONA,324
3,OVA,1438
4,Special,1294
5,TV,2939
