In [99]:
import sqlite3
import pandas as pd

# create connection to db
connection = sqlite3.connect('meta_music.db')

# create cursor
cursor = connection.cursor()

# drop tables if they exists
cursor.execute('DROP TABLE IF EXISTS artists')
cursor.execute('DROP TABLE IF EXISTS albums')
cursor.execute('DROP TABLE IF EXISTS features')

# create sql tables for artists, albums, and track features
artist_table_schema ='''CREATE TABLE artists (
                        artist_id INTERGER PRIMARY KEY, 
                        artist TEXT NOT NULL);'''

album_table_schema = '''CREATE TABLE albums (
                            album_id INTERGER PRIMARY KEY,
                            artist TEXT NOT NULL,
                            album TEXT NOT NULL, 
                            date TEXT NOT NULL, 
                            week_num INTEGER, 
                            meta_score INTEGER, 
                            user_score INTEGER,
                            artist_id INTEGER,
                            FOREIGN KEY(artist_id) REFERENCES artist (artist_id)
                        );'''

features_table_schema = '''CREATE TABLE features(
                            feature_id INTEGER, 
                            danceability REAL, 
                            energy REAL,
                            key INTEGER, 
                            loudness REAL, 
                            mode INTEGER, 
                            speechiness REAL, 
                            acousticness REAL, 
                            instrumentalness REAL,
                            liveness REAL,
                            valence REAL, 
                            tempo REAL, 
                            duration_ms INTEGER,
                            time_signature INTEGER,
                            artist_id INTEGER,
                            album_id INTEGER,
                            FOREIGN KEY (artist_id) REFERENCES artists (artist_id),
                            FOREIGN KEY (album_id) REFERENCES albums (album_id)
                            );'''

# create all tables
cursor.execute(features_table_schema)

cursor.execute(artist_table_schema)

cursor.execute(album_table_schema)


#check for tables 
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print(tables)

[('features',), ('artists',), ('albums',)]


In [100]:
# read csv into DF
combined_meta_scrapes_df = pd.read_csv('combined_csv.csv')  
# drop duplicate rows for artists and reset index
cleaned_artists_df = combined_meta_scrapes_df.drop_duplicates(subset=['artist'], ignore_index=True)
# create series for artist table in meta_db
artists_seriese = cleaned_artists_df['artist']

In [105]:
combined_meta_scrapes_df[combined_meta_scrapes_df['album'].isnull()]

Unnamed: 0,artist,album,date,week_num,meta_score,user_score
641,Nisennenmondai,,"April 1, 2016",13,76,0


In [106]:
combined_meta_scrapes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9319 entries, 0 to 9318
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   artist      9319 non-null   object
 1   album       9318 non-null   object
 2   date        9319 non-null   object
 3   week_num    9319 non-null   int64 
 4   meta_score  9319 non-null   int64 
 5   user_score  9319 non-null   int64 
dtypes: int64(3), object(3)
memory usage: 437.0+ KB


In [102]:
artists_seriese.to_sql(name='artists', con=connection, schema=artist_table_schema, index=True, index_label='artist_id', if_exists='append')

In [103]:
combined_meta_scrapes_df.to_sql(name='albums', con=connection, schema=album_table_schema, index=True, index_label='album_id', if_exists='append')

IntegrityError: NOT NULL constraint failed: albums.album

In [94]:
select_query = """SELECT * FROM artists"""


In [95]:
cursor.execute(select_query)
records = cursor.fetchall()

In [96]:
for row in records:
    print(row)

(0, 'The Streets')
(1, 'Tom Waits')
(2, 'Queens of the Stone Age')
(3, 'Spoon')
(4, 'Wilco')
(5, 'The Roots')
(6, 'Iron & Wine')
(7, 'Super Furry Animals')
(8, '24 Hour Party People')
(9, 'Solomon Burke')
(10, 'Songs: Ohia')
(11, '...And You Will Know Us by the Trail of Dead')
(12, 'Sleater-Kinney')
(13, 'Doves')
(14, 'Boards of Canada')
(15, 'They Might Be Giants')
(16, 'Out Hud')
(17, 'The Hives')
(18, 'The Flaming Lips')
(19, 'The Polyphonic Spree')
(20, 'Pulp')
(21, 'Ron Sexsmith')
(22, 'N.E.R.D')
(23, 'Hayden')
(24, 'Sigur Rós')
(25, 'Neko Case')
(26, "Me'Shell Ndegéocello")
(27, 'Bruce Springsteen & The E Street Band')
(28, 'Norah Jones')
(29, 'Mekons')
(30, 'Cornelius')
(31, '16 Horsepower')
(32, 'Sonic Youth')
(33, 'Elbow')
(34, 'Missy Elliott')
(35, 'Lemon Jelly')
(36, 'Interpol')
(37, 'DJ Shadow')
(38, 'Nas')
(39, 'Röyksopp')
(40, 'Mr. Lif')
(41, 'The Herbaliser')
(42, 'Josh Rouse')
(43, 'Ugly Casanova')
(44, 'Rilo Kiley')
(45, 'Coldplay')
(46, 'Godspeed You! Black Emperor')


(3662, 'JJ Doom')
(3663, 'Nude Beach')
(3664, 'CFCF')
(3665, 'Mux Mool')
(3666, 'Elton John vs Pnau')
(3667, 'Violens')
(3668, 'Porcelain Raft')
(3669, 'Distal')
(3670, 'Buckshot')
(3671, "The dB's")
(3672, 'ZZ Top')
(3673, 'Fixers')
(3674, 'Fashawn')
(3675, 'Allah-Las')
(3676, 'Sophia Knapp')
(3677, 'Memory Tapes')
(3678, 'Talk Normal')
(3679, 'Corrosion of Conformity')
(3680, 'Soulsavers')
(3681, 'Rosie Flores')
(3682, 'Sarah Jaffe')
(3683, 'Miike Snow')
(3684, 'Van Halen')
(3685, 'C.U.B.A.')
(3686, 'Daniel Lopatin')
(3687, 'The Lumineers')
(3688, 'Mike Wexler')
(3689, 'Howlin Rain')
(3690, 'Hot Panda')
(3691, 'Jerry Douglas')
(3692, 'The Chris Robinson Brotherhood')
(3693, 'Tu Fawning')
(3694, 'Jodie Marie')
(3695, 'Orcas')
(3696, 'Tamaryn')
(3697, 'Plug 2')
(3698, 'Ali Farka Toure')
(3699, 'Clipse')
(3700, 'The DFA')
(3701, 'Tom Zé')
(3702, 'Subtle')
(3703, 'Serena-Maneesh')
(3704, 'Brian Eno + David Byrne')
(3705, 'Josef K')
(3706, 'Charalambides')
(3707, 'Vitalic')
(3708, 'Vince 

In [107]:
import csv


In [112]:
with open('combined_csv.csv') as csvfile:

    # CSV reader specifies delimiter and variable that holds contents
    csvreader = csv.reader(csvfile, delimiter=',')

    # Read the header row first (skip this step if there is now header)
    csv_header = next(csvreader)
    # Read each row of data after the header
    for row in csvreader:
        if row[0] == 'Nisennenmondai':
            print(row)

['Nisennenmondai', '', 'April 1, 2016', '13', '76', '0']


UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 6065: character maps to <undefined>