# Creating a SimpleFrame Class

This project creates a SimpleFrame class to answer the following questions:

Which song had the highest number of plays in one day?
Which song had the lowest number of plays in one day?

In [1]:
import csv
import statistics as stat

class SimpleFrame():
    def __init__(self, filename):
        self.filename = filename
        
    def read_data(self):
        with open(self.filename, 'r', encoding='utf-8') as f:
            self.data = list(csv.reader(f))
        self.columns = self.data[0]
        
    def head(self):
        return self.data[:5]
    
    def shape(self):
        return([len(self.data), len(self.columns)])
    
    def new_column(self, new_name):
        self.data[0].append(new_name)
        for i in range(1, len(self.data)):
            self.data[i].append(0)
            
    def apply(self,to_replace, new_value, x): #x = 0 for row, = 1 for column
        if x == 0:
            for i in range(1, len(self.data)):
                if self.data[i] == to_replace:
                    self.data[i] = [new_value for j in self.data[i]]
                else:
                    print('No row matches the argument')
        elif x == 1:
            for pos, i in enumerate(self.data[0]):
                if i == to_replace:
                    replace_index = pos
            for data in self.data[1:]:
                data[replace_index] = new_value
                
    def subset(self, column_name, row_value):
        for pos, col in enumerate(self.data[0]):
            if col == column_name:
                column_index = pos
        
        print(column_index)
        subset_data = []
        for data in self.data[1:]:
            if row_value in data:
                subset_data.append(data[column_index])
        return subset_data
        
    def summary_stats(self, column_name):
        for pos, col in enumerate(self.data[0]):
            if col == column_name:
                column_index = pos
                
        num_data = [data[column_index] for data in self.data[1:]]
        data_mean = stat.mean(num_data)
        data_median = stat.median(num_data)
        data_mode = stat.mode(num_data)
        data_std = stat.stdev(num_data)
        
        stat_name = ["mean", "median", "mode", "standard deviation"]
        stat_num = [data_mean, data_median, data_mode, data_std]
        
        for i in range(4):
            print("The {name} is {value}".format(name=stat_name[i], value=stat_num[i]))
            
    def maximum(self, column_name):
        for pos, col in enumerate(self.data[0]):
            if col == column_name:
                column_index = pos
                
        sub_data = []
        
        for row in self.data[1:]:
            sub_data.append([row[1], row[2], int(row[column_index])])
            
        return max(sub_data, key=lambda x:x[2])
        #return sub_data
    
    def minimum(self, column_name):
        for pos, col in enumerate(self.data[0]):
            if col == column_name:
                column_index = pos
                
        sub_data = []
        
        for row in self.data[1:]:
            sub_data.append([row[1], row[2], int(row[column_index])])
            
        return min(sub_data, key=lambda x:x[2])

In [19]:
# Verify if the SimpleClass works

music = SimpleFrame('music_data.csv')
music.read_data()
music.new_column('Month')
print(music.columns)
print()
print(music.shape())
print()
print(music.head())

['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region', 'Month']

[37101, 7]

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region', 'Month'], ['0', 'Reggaetón Lento (Bailemos)', 'CNCO', '19272', '2017-01-01', 'ec', 0], ['1', 'Chantaje', 'Shakira', '19270', '2017-01-01', 'ec', 0], ['2', 'Otra Vez (feat. J Balvin)', 'Zion & Lennox', '15761', '2017-01-01', 'ec', 0], ['3', "Vente Pa' Ca", 'Ricky Martin', '14954', '2017-01-01', 'ec', 0]]


In [18]:
# Song with highest one day stream
highest_daily_stream = music.maximum('Streams')
print("The song \''{song}\'' has the highest one-day stream at {number}.\n".format(song = highest_daily_stream[0], number=highest_daily_stream[2]))

# Song with lowest one day stream
lowest_daily_stream = music.minimum('Streams')
print("The song \''{song}\'' has the lowest one-day stream at {number}.\n".format(song = lowest_daily_stream[0], number=lowest_daily_stream[2]))

# Which song was the top song of 2017?
song_dict = {}

for row in music.data[1:]:
    if row[1] not in song_dict:
        song_dict[row[1]] = 0
    song_dict[row[1]] += int(row[3])

sort_song = sorted(song_dict.items(), key=lambda x:x[1], reverse=True)

print("Top 5 songs of 2017 is:")
for i in range(5):
    print("{} - {:,} streams".format(sort_song[i][0],sort_song[i][1]))
print()

# Who was the most dominant artist of 2017?
artist_dict = {}
for row in music.data[1:]:
    if row[2] not in artist_dict:
        artist_dict[row[2]] = 0
    artist_dict[row[2]] += int(row[3])

sort_artist = sorted(artist_dict.items(), key=lambda x:x[1], reverse=True)
print("Top 5 artists of 2017 is:")
for i in range(5):
    print("{} - {:,} streams".format(sort_artist[i][0],sort_artist[i][1]))
print()

The song ''Despacito (Featuring Daddy Yankee)'' has the highest one-day stream at 64238.

The song ''Por Fin Te Encontré'' has the lowest one-day stream at 1993.

Top 5 songs of 2017 is:
Me Rehúso - 6,364,823 streams
Despacito (Featuring Daddy Yankee) - 5,634,758 streams
Shape of You - 5,256,420 streams
Felices los 4 - 4,563,383 streams
Una Lady Como Tú - 3,928,923 streams

Top 5 artists of 2017 is:
J Balvin - 12,784,607 streams
Luis Fonsi - 8,670,717 streams
Ed Sheeran - 8,565,005 streams
Ozuna - 8,052,866 streams
Maluma - 8,023,769 streams



In [27]:
# Who was the dominant artist for each month of 2017?

from datetime import datetime

month_list = []
for row in music.data[1:]:
    row[6] = datetime.strptime(row[4], "%Y-%m-%d").month
    if row[6] not in month_list:
        month_list.append(row[6])

month_dict = {}

for month in month_list:
    month_artist = {}
    for row in music.data[1:]:
        if row[6] == month:
            if row[2] not in month_artist:
                month_artist[row[2]] = int(row[3])
            month_artist[row[2]] += int(row[3])
    sort_artist = sorted(month_artist.items(), key=lambda x:x[1], reverse=True)            
    month_dict[month] = sort_artist[0]
    
month_dict


{1: ('Luis Fonsi', 1126100),
 2: ('Luis Fonsi', 1252748),
 3: ('Ed Sheeran', 1490895),
 4: ('Ed Sheeran', 974170),
 5: ('Maluma', 1130119),
 6: ('Maluma', 938938),
 7: ('J Balvin', 1983023),
 8: ('J Balvin', 1839051),
 9: ('J Balvin', 1582042),
 10: ('J Balvin', 1458143),
 11: ('J Balvin', 1116311),
 12: ('Maluma', 1278942)}