In [1]:
# 2021-05, Christoph Meier, https://github.com/chrisP-cpmr
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

import pandas as pd
import copy as copy

pd.set_option('precision', 3)
pd.set_option('max_rows', 30)
pd.set_option('max_colwidth', 30)

In [2]:
filename = "../Meier_Christoph_Data/Hitparade.ch_2010_src.csv"

weeklyCharts = pd.read_csv(filename,
                          encoding='UTF-8')

## Replace "-" with "." for the Week entries

In [3]:
weeklyCharts

Unnamed: 0,VW,ArtistAndTitle,WeeksAndPeak,Rank,Week
0,VW 1,Melanie Fiona</b><br/>Mond...,W 10 | P 1,1,03-01-2010
1,VW 2,Rihanna</b><br/>Russian Ro...,W 6 | P 2,2,03-01-2010
2,VW 3,The Black Eyed Peas</b><br...,W 10 | P 3,3,03-01-2010
3,VW 10,Jay-Z + Alicia Keys</b><br...,W 14 | P 4,4,03-01-2010
4,VW 7,OneRepublic</b><br/>All Th...,W 9 | P 2,5,03-01-2010
...,...,...,...,...,...
50145,VW 90,Shawn Mendes</b><br/>Wonde...,W 13 | P 19,96,03-01-2021
50146,VW 98,Jonas Brothers</b><br/>Lik...,W 4 | P 71,97,03-01-2021
50147,VW 93,Justin Bieber feat. Chance...,W 15 | P 10,98,03-01-2021
50148,re,Maroon 5</b><br/>Memories</a>,W 53 | P 3,99,03-01-2021


In [4]:
weeklyCharts[" Week"] = weeklyCharts[" Week"].str.replace('-', '.', regex=True)
weeklyCharts

Unnamed: 0,VW,ArtistAndTitle,WeeksAndPeak,Rank,Week
0,VW 1,Melanie Fiona</b><br/>Mond...,W 10 | P 1,1,03.01.2010
1,VW 2,Rihanna</b><br/>Russian Ro...,W 6 | P 2,2,03.01.2010
2,VW 3,The Black Eyed Peas</b><br...,W 10 | P 3,3,03.01.2010
3,VW 10,Jay-Z + Alicia Keys</b><br...,W 14 | P 4,4,03.01.2010
4,VW 7,OneRepublic</b><br/>All Th...,W 9 | P 2,5,03.01.2010
...,...,...,...,...,...
50145,VW 90,Shawn Mendes</b><br/>Wonde...,W 13 | P 19,96,03.01.2021
50146,VW 98,Jonas Brothers</b><br/>Lik...,W 4 | P 71,97,03.01.2021
50147,VW 93,Justin Bieber feat. Chance...,W 15 | P 10,98,03.01.2021
50148,re,Maroon 5</b><br/>Memories</a>,W 53 | P 3,99,03.01.2021


# Check if data is clean 

## Check for duplicates

In [5]:
weeklyCharts[weeklyCharts.duplicated()]

Unnamed: 0,VW,ArtistAndTitle,WeeksAndPeak,Rank,Week


## Check for typos / anomalies in column " Week"

In [6]:
weeklyCharts[" Week"].value_counts()

14.02.2021    100
05.08.2018    100
01.10.2017    100
05.02.2017    100
06.09.2020    100
             ... 
04.04.2010     75
13.11.2011     75
13.09.2015     75
19.07.2015     75
08.09.2013     75
Name:  Week, Length: 589, dtype: int64

### List to save the changed rows

In [7]:
impunitiesExcel = pd.read_csv(filename,
                          encoding='UTF-8')
impunitiesExcel.drop(impunitiesExcel.index, inplace=True)

# Insert some duplicates

In [8]:
weeklyCharts = weeklyCharts.append(weeklyCharts.sample(100))

In [9]:
weeklyCharts[weeklyCharts.duplicated()]

Unnamed: 0,VW,ArtistAndTitle,WeeksAndPeak,Rank,Week
32752,VW 3,Luis Fonsi &amp; Demi Lova...,W 7 | P 2,3,07.01.2018
15840,VW 21,Ellie Goulding</b><br/>How...,W 8 | P 16,16,09.02.2014
17239,VW 67,Disclosure feat. Eliza Doo...,W 4 | P 65,65,15.06.2014
27380,VW 28,Shakira feat. Maluma</b><b...,W 8 | P 28,31,25.12.2016
37012,VW 55,Aya Nakamura</b><br/>Djadj...,W 23 | P 43,63,28.10.2018
...,...,...,...,...,...
46951,VW 1,Master KG feat. Burna Boy ...,W 13 | P 1,2,27.09.2020
15103,VW 32,Bakermat</b><br/>One Day (...,W 11 | P 22,29,01.12.2013
677,VW 10,Iyaz</b><br/>Replay</a>,W 4 | P 3,3,07.03.2010
26634,re,Emeli Sandé</b><br/>Hurts</a>,W 3 | P 64,85,30.10.2016


In [10]:
impunitiesExcel = impunitiesExcel.append(weeklyCharts[weeklyCharts.duplicated()])

## Inster null values

In [11]:
weeklyChartsAddNull = weeklyCharts.sample(10)

In [12]:
weeklyChartsAddNull[" WeeksAndPeak"] = "200"
weeklyChartsAddNull

Unnamed: 0,VW,ArtistAndTitle,WeeksAndPeak,Rank,Week
34532,VW 85,Kygo feat. Justin Jesso</b...,200,83,06.05.2018
22876,VW 2,Justin Bieber</b><br/>Sorr...,200,2,29.11.2015
47387,VW 51,Joya Marleen</b><br/>Night...,200,38,25.10.2020
18603,VW 4,Calvin Harris feat. John N...,200,4,26.10.2014
4420,VW 66,Ben L'Oncle Soul</b><br/>S...,200,71,20.02.2011
15667,re,Ylvis</b><br/>The Fox</a>,200,68,19.01.2014
31384,VW 28,Rita Ora</b><br/>Your Song...,200,35,01.10.2017
19980,VW 35,Helene Fischer</b><br/>Ate...,200,31,01.03.2015
34263,VW 14,Calvin Harris &amp; Dua Li...,200,14,22.04.2018
14207,VW 38,Klangkarussell</b><br/>Son...,200,33,08.09.2013


In [13]:
weeklyCharts = weeklyCharts.append(weeklyChartsAddNull)

In [14]:
impunitiesExcel = impunitiesExcel.append(weeklyChartsAddNull)

In [15]:
weeklyCharts.to_csv("../Meier_Christoph_Data/Hitparade.ch_2010_src_dirty.csv", index=False)
impunitiesExcel.to_excel("../Meier_Christoph_Data/Meier_changeRows.xlsx")