In [1]:
# Importing pandas
import pandas as pd

# Creating dataframes
tags = pd.read_csv("tags.csv")
torrents = pd.read_csv("torrents.csv")

# Changing columns names
tags.rename(columns={"index": "Index", "id": "ID", "tag": "Tag"}, inplace=True)
torrents.rename(columns={"groupName": "Group Name", "totalSnatched": "Total Snatched", "artist": "Artist", 
                        "groupYear": "Group Year", "releaseType": "Release Type", "groupId": "Group ID",
                        "id": "ID"}, inplace=True)

# Converting into title format
tags["Tag"] = tags["Tag"].str.title()
torrents["Group Name"] = torrents["Group Name"].str.title()
torrents["Artist"] = torrents["Artist"].str.title()
torrents["Release Type"] = torrents["Release Type"].str.title()

# Replacing wrong characters
tags["Tag"] = tags["Tag"].str.replace(".", " ")
tags["Tag"] = tags["Tag"].str.replace("S$", "s", regex = True)
torrents["Group Name"] = torrents["Group Name"].str.replace("&#39;", "'")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Amp;", "&")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Quot;", "\"")
torrents["Group Name"] = torrents["Group Name"].str.replace("&Aacute;", "á")
torrents["Artist"] = torrents["Artist"].str.replace("&#39;", "'")
torrents["Artist"] = torrents["Artist"].str.replace("&Amp;", "&")
torrents["Artist"] = torrents["Artist"].str.replace("&Quot;", "\"")
torrents["Artist"] = torrents["Artist"].str.replace("&Aacute;", "á")

# Checking for NaNs in tags dataframe
is_NaN = tags.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = tags[row_has_NaN]

# Checking for NaNs in torrents dataframe
is_NaN = torrents.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = torrents[row_has_NaN]

# Removing NaNs and reset indexes
tags = tags.dropna().reset_index(drop=True)
torrents = torrents.dropna().reset_index(drop=True)

# Checking for duplicates
torrents[torrents.duplicated(subset="ID")]

Unnamed: 0,Group Name,Total Snatched,Artist,Group Year,Release Type,Group ID,ID


In [2]:
# Global variables and imports
import matplotlib.pyplot as plt
import plotly as py
import plotly.graph_objs as go
import cufflinks
import ipywidgets as widgets

In [3]:
# Main function
def main():
    print("Co słychać w Zatoce Piratów?\nAplikacja jest do Twojej dyspozycji!\nWprowadź właściwe polecenie:\n")
    print("O dla Opisu\nS dla Statystyk\nW dla Wizualizacji\n")
    choice_main = input()
    choice_tree_main(choice_main)

# Main choice tree function
def choice_tree_main(choice_main):
    if choice_main.upper() not in ["O", "S", "W"]:
        choice_main = input("\nSprawdź poprawność swojego zapytania.")
    elif choice_main.upper() == "O":
        print("\nDane opisowe:\n")
        choice_value_counts = input()
        choice_tree_value_counts(choice_value_counts)
    elif choice_main.upper() == "S":
        print("\nDane dla statystyki:\n")
        choice_stat_column = input()
        choice_tree_stat(choice_stat_column)
    elif choice_main.upper() == "W":
        print("\nDane dla wizualizacji:\n")
        choice_vis_column_1 = input()
        choice_vis_column_2 = input()
        choice_tree_vis(choice_vis_column_1, choice_vis_column_2)

# Value counts choice tree function
def choice_tree_value_counts(choice_value_counts):
    if choice_value_counts in torrents.columns:
        print(torrents[choice_value_counts].value_counts())
        print(len(torrents[choice_value_counts].value_counts()))

# Statistic choice tree function
def choice_tree_stat(choice_stat_column):
    if choice_stat_column in torrents.columns:
        print(torrents[choice_stat_column].describe())
        print(torrents[choice_stat_column].value_counts())
        print(len(torrents[choice_stat_column].value_counts()))

# Visual choice tree function
def choice_tree_vis(choice_vis_column_1, choice_vis_column_2):
    if choice_vis_column_1 in torrents.columns and choice_vis_column_2 in torrents.columns:
        plt.hist(torrents[choice_vis_column_1],bins=len(torrents[choice_vis_column_1].value_counts()))
        print(torrents.plot.scatter(x=choice_vis_column_1, y=choice_vis_column_2))