In [17]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import plotly as py
import plotly.graph_objs as go
import cufflinks
import ipywidgets as widgets
from IPython.display import display

In [18]:
# Data cleaning

# Creating dataframes
tags = pd.read_csv("tags.csv")
torrents = pd.read_csv("torrents.csv")

# Changing columns names
tags.rename(columns={"index": "Index", "id": "ID", "tag": "Tag"}, inplace=True)
torrents.rename(columns={"groupName": "Group Name", "totalSnatched": "Total Snatched", "artist": "Artist", 
                        "groupYear": "Group Year", "releaseType": "Release Type", "groupId": "Group ID",
                        "id": "ID"}, inplace=True)

# Converting into title format
tags["Tag"] = tags["Tag"].str.title()
torrents["Group Name"] = torrents["Group Name"].str.title()
torrents["Artist"] = torrents["Artist"].str.title()
torrents["Release Type"] = torrents["Release Type"].str.title()

# Replacing wrong characters
tags["Tag"] = tags["Tag"].str.replace(".", " ")
tags["Tag"] = tags["Tag"].str.replace("S$", "s", regex = True)
torrents["Group Name"] = torrents["Group Name"].str.replace("&#39;", "'")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Amp;", "&")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Quot;", "\"")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Aacute;", "á")
torrents["Artist"] = torrents["Artist"].str.replace("&#39;", "'")
torrents["Artist"] = torrents["Artist"].str.replace("&Amp;", "&")
torrents["Artist"] = torrents["Artist"].str.replace("&Quot;", "\"")
torrents["Artist"] = torrents["Artist"].str.replace("&Aacute;", "á")

# Checking for NaNs in tags dataframe
is_NaN_tags = tags.isnull()
row_has_NaN_tags = is_NaN_tags.any(axis=1)
rows_with_NaN_tags = tags[row_has_NaN_tags]

# Checking for NaNs in torrents dataframe
is_NaN_torrents = torrents.isnull()
row_has_NaN_torrents = is_NaN_torrents.any(axis=1)
rows_with_NaN_torrents = torrents[row_has_NaN_torrents]

# Removing NaNs and reset indexes
tags = tags.dropna().reset_index(drop=True)
torrents = torrents.dropna().reset_index(drop=True)

# Checking for duplicates
torrents[torrents.duplicated(subset="ID")]

Unnamed: 0,Group Name,Total Snatched,Artist,Group Year,Release Type,Group ID,ID


In [19]:
def main():
    print("Wybierz jedną z opcji:")
    show_widget()
    
def show_widget():
    display(my_choice_widget, handler_out)
    
def my_choice_handler(widget_value):
    print(f"pracuje z {widget_value}")
    
def button_1_callback(button):
    print("Tutorial")
    
def button_2_callback(button):
    print("Wizualizacja")
    
def button_3_callback(button):
    print("Statystyka")
    
my_choice_widget = widgets.Dropdown(options=["Tutorial", "Wizualizacja", "Statystyka"])
handler_out = widgets.interactive_output(my_choice_handler, {"widget_value": my_choice_widget})

button_1 = widgets.Button(description="Run 1")
button_2 = widgets.Button(description="Run 2")
button_3 = widgets.Button(description="Run 3")

button_1.on_click(button_1_callback)
button_2.on_click(button_2_callback)
button_3.on_click(button_3_callback)

In [20]:
def artist_rank():
    x = input("Wybierz ilu topowych artystów chcesz zobaczyć: ")
    while x.isdigit() == False or int(x)>50:
        x = input("Podana wartość musi być liczbą mniejszą lub rowna 50.")
    x = int(x)
    torrents_df = torrents[["Artist","Total Snatched"]]
    grouped_torrents_df = torrents_df.groupby(by="Artist")
    sum_grouped_torrents_df = grouped_torrents_df.sum()
    print(sum_grouped_torrents_df.sort_values("Total Snatched",ascending=False)[:x])

In [21]:
def year_release_rank():
    y = input("Wybierz z którego roku podać dane: ")
    while y.isdigit() == False or int(y) < 1979 or int(y) > 2016:
        y = input("Tylko dane dla lat 1979-2016.")
    y = int(y)
    year_type_df = torrents[torrents["Group Year"] == y]
    year_type_vc = year_type_df["Release Type"].value_counts()
    print(year_type_vc)

In [22]:
def year_snatched_rank():
    z = input("Wybierz z którego roku podać dane: ")
    while z.isdigit() == False or int(z) < 1979 or int(z) > 2016:
        z = input("Tylko dane dla lat 1979-2016.")
    z = int(z)
    year_snatched_df = torrents[torrents["Group Year"] == z]
    year_snatched_vc = year_snatched_df["Total Snatched"].sum()
    print(f"W {z} roku pobrano łącznie {year_snatched_vc} płyt.")

In [23]:
def year_tags_rank():
    a = input("Wybierz z którego roku podać dane: ")
    while a.isdigit() == False or int(a) < 1979 or int(a) > 2016:
        a = input("Tylko dane dla lat 1979-2016.")
    a = int(a)
    tags_year_df = pd.merge(tags,torrents[["ID","Group Year"]],on="ID", how="left")
    tags_year_df = tags_year_df[tags_year_df["Group Year"] == a]
    tags_year_vc = tags_year_df["Tag"].value_counts().head(50)
    print(tags_year_vc)

In [24]:
def year_toptitle_rank():
    b = input("Wybierz z którego roku podać dane: ")
    while b.isdigit() == False or int(b) < 1979 or int(b) > 2016:
        b = input("Tylko dane dla lat 1979-2016.")
    b = int(b)
    c = input("wybierz ilu topowych tytułow chcesz zobaczyć w danym roku: ")
    while c.isdigit() == False or int(c)>50:
        c = input("Podana wartość musi być liczbą mniejszą lub równa 50.")
    c = int(c)
    year_toptitle_df = torrents.where(torrents["Group Year"] == b)
    year_toptitle_df = year_toptitle_df[["Group Name","Total Snatched"]]
    group_year_toptitle_df = year_toptitle_df.groupby(by="Group Name")
    group_year_toptitle_df = group_year_toptitle_df.sum()
    group_year_toptitle_df = group_year_toptitle_df.sort_values("Total Snatched",ascending=False)[:c]
    print(f"W {b} ranking top artystów wygląda następująco: {group_year_toptitle_df} ")