# BabyPandas Class
### We will be creating a baby version of the pandas class called BabyPandas.  This class will allow us to use Spotify data to help us to answer the question:
- Which song had the highest number of plays in one day?

BabyPandas -> **Class**  
Load -> **Method**  
Data -> **Attribute**  
Columns -> **Attribute**

Preview  
View the first five rows -> **Method**  
View number of rows/cols in our data -> **Method**  
View the data types for each column -> **Method**

Manipulate  
Add new columns -> **Method**  
Apply values to columns -> **Method**  
Subset our data -> **Method**  
Change the data type -> **Method**

Calculations  
Minimum -> **Method**  
Maximum -> **Method**  
Mean -> **Method**  
Standard deviation -> **Method**  


In [1]:
import statistics

class BabyPandas():
    
    def __init__(self,filename):
        self.filename = filename
        
    def read_data(self):
        file = open(self.filename,"r")
        filestring = file.read()
        stringlist = filestring.split("\n")
        data = []
        for row in stringlist:
            data.append(row.split(","))
        ncol = len(data[0])
        final_data = []
        for row in data:
            if len(row)==ncol:
                final_data.append(row)
        self.columns = final_data[0]
        self.data = final_data
        
    def head(self):
        return self.data[0:5]
    
    def info(self):
        types = []
        for col in self.data[1]:
            types.append(type(col))
        return types
    
    def shape(self):
        nrow = 0
        for row in self.data[1:]:
            nrow += 1
        ncol = len(self.data[1])
        return (nrow, ncol)

    def new_column(self,colname):
        for pos,d in enumerate(self.data):
            if pos==0:
                d.append(colname)
            else:
                d.append("N/A")
    
    def apply(self,index,values,row):
        if row==True:
            self.data[index]=values
        else:
            i=0
            for row in self.data:
                row[index] = values[i]
                i += 1
    
    def subset(self,col_index,rowval):
        # keep rows where column has rowval
        new_data = []
        for row in self.data:
            if row[col_index]==rowval:
                new_data.append(row)
        self.data = new_data
    
    def change_type(self,index,new_type):
        for row in self.data:
            row[index] = new_type(row[index])
            
    def summary_stats(self,col_index):
        col = []
        for row in self.data:
            col.append(row[col_index])
        mean = statistics.mean(col)
        median = statistics.median(col)
        try:
            mode = statistics.mode(col)
        except:
            mode = "undefined, likely all values different"
        stdev = statistics.stdev(col)
        string = "The {} is {}.\n"
        print(string.format("mean",str(mean)) + string.format("median",str(median)) + string.format("mode",str(mode)) + string.format("stdev",str(stdev)))
        
    def maximum(self,col_index):
        col = []
        for row in self.data:
            col.append(row[col_index])
        return max(col)
    
    def minimum(self,col_index):
        col = []
        for row in self.data:
            col.append(row[col_index])
        return min(col)

## Test out the functionality

In [2]:
music = BabyPandas("music_data.csv")

In [3]:
music.filename

'music_data.csv'

In [4]:
music.read_data()

In [5]:
music.columns
# looks like the columns are weird...oh well, 
# work around this for now

['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region']

In [6]:
music.data

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region'],
 ['0', 'Reggaetón Lento (Bailemos)', 'CNCO', '19272', '2017-01-01', 'ec'],
 ['1', 'Chantaje', 'Shakira', '19270', '2017-01-01', 'ec'],
 ['2',
  'Otra Vez (feat. J Balvin)',
  'Zion & Lennox',
  '15761',
  '2017-01-01',
  'ec'],
 ['3', "Vente Pa' Ca", 'Ricky Martin', '14954', '2017-01-01', 'ec'],
 ['4', 'Safari', 'J Balvin', '14269', '2017-01-01', 'ec'],
 ['5', 'La Bicicleta', 'Carlos Vives', '12843', '2017-01-01', 'ec'],
 ['6', 'Ay Mi Dios', 'IAmChino', '10986', '2017-01-01', 'ec'],
 ['7', 'Andas En Mi Cabeza', 'Chino & Nacho', '10653', '2017-01-01', 'ec'],
 ['8', 'Traicionera', 'Sebastian Yatra', '9807', '2017-01-01', 'ec'],
 ['9', 'Shaky Shaky', 'Daddy Yankee', '9612', '2017-01-01', 'ec'],
 ['10', 'Vacaciones', 'Wisin', '9611', '2017-01-01', 'ec'],
 ['11', 'Dile Que Tu Me Quieres', 'Ozuna', '8982', '2017-01-01', 'ec'],
 ['12', 'Let Me Love You', 'DJ Snake', '8834', '2017-01-01', 'ec'],
 ['13', 'DUELE EL CORAZON', 'Enrique Ig

In [7]:
music.head()

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region'],
 ['0', 'Reggaetón Lento (Bailemos)', 'CNCO', '19272', '2017-01-01', 'ec'],
 ['1', 'Chantaje', 'Shakira', '19270', '2017-01-01', 'ec'],
 ['2',
  'Otra Vez (feat. J Balvin)',
  'Zion & Lennox',
  '15761',
  '2017-01-01',
  'ec'],
 ['3', "Vente Pa' Ca", 'Ricky Martin', '14954', '2017-01-01', 'ec']]

In [8]:
music.data[-1]

['37099', 'Alguien Robo', 'Sebastian Yatra', '2950', '2018-01-09', 'ec']

In [9]:
music.info()

[str, str, str, str, str, str]

In [10]:
music.shape()

(36121, 6)

In [11]:
music.new_column("test_column")

In [12]:
music.head()

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region', 'test_column'],
 ['0',
  'Reggaetón Lento (Bailemos)',
  'CNCO',
  '19272',
  '2017-01-01',
  'ec',
  'N/A'],
 ['1', 'Chantaje', 'Shakira', '19270', '2017-01-01', 'ec', 'N/A'],
 ['2',
  'Otra Vez (feat. J Balvin)',
  'Zion & Lennox',
  '15761',
  '2017-01-01',
  'ec',
  'N/A'],
 ['3', "Vente Pa' Ca", 'Ricky Martin', '14954', '2017-01-01', 'ec', 'N/A']]

In [13]:
music.shape()

(36121, 7)

In [14]:
from random import random

(nrow,ncol) = music.shape()
new_vals = []
for i in range(nrow+1):
    if i==0:
        new_vals.append("new_vals")
    else:
        new_vals.append(str(random()))

print(len(new_vals))        
new_vals[0:5] 

36122


['new_vals',
 '0.526189235720525',
 '0.16142277019008977',
 '0.5859931394940241',
 '0.5976895515830822']

In [15]:
music.apply(6,new_vals,False)
music.head()

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region', 'new_vals'],
 ['0',
  'Reggaetón Lento (Bailemos)',
  'CNCO',
  '19272',
  '2017-01-01',
  'ec',
  '0.526189235720525'],
 ['1',
  'Chantaje',
  'Shakira',
  '19270',
  '2017-01-01',
  'ec',
  '0.16142277019008977'],
 ['2',
  'Otra Vez (feat. J Balvin)',
  'Zion & Lennox',
  '15761',
  '2017-01-01',
  'ec',
  '0.5859931394940241'],
 ['3',
  "Vente Pa' Ca",
  'Ricky Martin',
  '14954',
  '2017-01-01',
  'ec',
  '0.5976895515830822']]

In [16]:
music.data[-1]

['37099',
 'Alguien Robo',
 'Sebastian Yatra',
 '2950',
 '2018-01-09',
 'ec',
 '0.4140713543305111']

In [17]:
def apply(data,index,values,row):
    if row==True:
        data[index]=values
    else:
        i=0
        for row in data:
            row[index] = values[i]
            i += 1
    return data

In [18]:
test = [["1","2","3"],["4","5","6"]]
test2 = apply(test,1,["10","20"],True)
print(test2)

[['1', '2', '3'], ['10', '20']]


In [19]:
music.subset(2,"Shakira")

In [20]:
music.head()

[['1',
  'Chantaje',
  'Shakira',
  '19270',
  '2017-01-01',
  'ec',
  '0.16142277019008977'],
 ['101',
  'Chantaje',
  'Shakira',
  '15594',
  '2017-01-02',
  'ec',
  '0.15584184581085303'],
 ['201',
  'Chantaje',
  'Shakira',
  '16614',
  '2017-01-03',
  'ec',
  '0.4399478643341229'],
 ['301',
  'Chantaje',
  'Shakira',
  '16582',
  '2017-01-04',
  'ec',
  '0.16425612560089675'],
 ['401',
  'Chantaje',
  'Shakira',
  '16254',
  '2017-01-05',
  'ec',
  '0.6053222968888792']]

In [21]:
music.change_type(6,lambda x: float(x))

In [22]:
music.head()

[['1',
  'Chantaje',
  'Shakira',
  '19270',
  '2017-01-01',
  'ec',
  0.16142277019008977],
 ['101',
  'Chantaje',
  'Shakira',
  '15594',
  '2017-01-02',
  'ec',
  0.15584184581085303],
 ['201',
  'Chantaje',
  'Shakira',
  '16614',
  '2017-01-03',
  'ec',
  0.4399478643341229],
 ['301',
  'Chantaje',
  'Shakira',
  '16582',
  '2017-01-04',
  'ec',
  0.16425612560089675],
 ['401',
  'Chantaje',
  'Shakira',
  '16254',
  '2017-01-05',
  'ec',
  0.6053222968888792]]

In [23]:
music.summary_stats(6)

The mean is 0.5083549934064555.
The median is 0.5163719177497641.
The mode is undefined, likely all values different.
The stdev is 0.2864735362460558.



In [24]:
music.maximum(6)

0.9969422654876078

In [25]:
music.minimum(6)

0.0027587279323803005

### Use BabyPandas to answer the following question:
- Which song was the top song of 2017?

In [26]:
# read in the data
music = BabyPandas("music_data.csv")
music.read_data()
music.head()

[['', 'Track Name', 'Artist', 'Streams', 'Date', 'Region'],
 ['0', 'Reggaetón Lento (Bailemos)', 'CNCO', '19272', '2017-01-01', 'ec'],
 ['1', 'Chantaje', 'Shakira', '19270', '2017-01-01', 'ec'],
 ['2',
  'Otra Vez (feat. J Balvin)',
  'Zion & Lennox',
  '15761',
  '2017-01-01',
  'ec'],
 ['3', "Vente Pa' Ca", 'Ricky Martin', '14954', '2017-01-01', 'ec']]

In [30]:
# add a year column and subset to songs in 2017
from datetime import datetime
music.new_column("year")
music.change_type(4,lambda x: datetime.strptime(x,"%Y-%m-%d"))

ValueError: time data 'Date' does not match format '%Y-%d-%m'

In [None]:
# create a dictionary with track name and
# stream counts for 2017

In [29]:
datetime.strptime('2017-01-01',"%Y-%m-%d")

datetime.datetime(2017, 1, 1, 0, 0)

In [31]:
years=music.subset(4,rowval)

TypeError: 'BabyPandas' object does not support indexing

In [1]:
file = open("music_data.csv","r")
filestring = file.read()
stringlist = filestring.split("\n")
data = []
for row in stringlist:
    data.append(row.split(","))
data=data[:-1]

In [2]:
dates=[]
for row in data[1:]:
    dates.append(row[4])
dates

['2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 'Farruko',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01',
 '2017-01-01'

In [10]:
lengths = []
for date in dates:
    lengths.append(len(date))
lengths

[10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 7,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 4,
 10,
 10,
 10,
 10,
 10,
 16,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 7,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 4,
 10,
 10,
 16,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,
 10,

In [4]:
max(lengths)

23

In [5]:
min(lengths)

4

In [15]:
weird_dates=[]
for date in dates:
    if len(date) != 10:
        weird_dates.append(date)
weird_dates

['Farruko',
 '2218',
 ' Zion & Lennox]"',
 'Farruko',
 '2497',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 '2574',
 'Farruko',
 ' Zion & Lennox]"',
 '2497',
 'Farruko',
 ' Zion & Lennox]"',
 '2650',
 'Farruko',
 ' Zion & Lennox]"',
 '2690',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 ' Messiah & Baby Rasta]"',
 'Farruko',
 ' Zion & Lennox]"',
 ' Messiah & Baby Rasta]"',
 ' Zion & Lennox]"',
 'Farruko',
 ' Messiah & Baby Rasta]"',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Zion & Lennox]"',
 'Farruko',
 ' Messiah & Baby Rasta]"',
 ' Zion &

In [14]:
len(weird_dates[1])

10