In [1]:
# Imports
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship


In [2]:
# Create Engine
engine = create_engine("sqlite:///soundcloud.sqlite")


In [3]:
# Use `declarative_base` from SQLAlchemy to model the demographics table as an ORM class
# Make sure to specify types for each column
# Declare a Base object here
Base = declarative_base()


In [4]:
# Define the ORM class for `allTimePlayCount`

class allTimePlayCount(Base):
    
    __tablename__ = 'allTimePlayCount'
    __table_args__ = {'extend_existing': True} 
    
    Genre = Column(Text, primary_key=True)
    allTimePlays = Column(Integer)
    
    def __repr__(self):
        return f"id={self.Genre}, name={self.allTimePlays}"


In [5]:
# Define the ORM class for `Station`
class weeklyPlayCount(Base):
    
    __tablename__ = 'weeklyPlayCount'
    __table_args__ = {'extend_existing': True} 


    Genre = Column(Text, primary_key=True)
    weeklyPlays = Column(Integer)

    def __repr__(self):
        return f"id={self.Genre}, name={self.weeklyPlays}"


In [6]:
# Use `create_all` to create the tables
Base.metadata.create_all(engine)


In [7]:
# Verify that the table names exist in the database
engine.table_names()


['allTimePlayCount', 'weeklyPlayCount']

In [8]:
# Use Pandas to Bulk insert each CSV file into their appropriate table

def populate_table(engine, table, csvfile):
    """Populates a table from a Pandas DataFrame."""
    # connect to the database
    conn = engine.connect()
    
    # Load the CSV file into a pandas dataframe 
    df_of_data_to_insert = pd.read_csv(csvfile)
    
    # Orient='records' creates a list of data to write
    # http://pandas-docs.github.io/pandas-docs-travis/io.html#orient-options
    data = df_of_data_to_insert.to_dict(orient='records')

    # Optional: Delete all rows in the table 
    conn.execute(table.delete())

    # Insert the dataframe into the database in one bulk insert
    conn.execute(table.insert(), data)
    
# Call the function to insert the data for each table
populate_table(engine, weeklyPlayCount.__table__, 'outputs/allWeeklyPlays.csv')
populate_table(engine, allTimePlayCount.__table__, 'outputs/allTimePlays.csv')


In [9]:
# Use a basic query to validate that the data was inserted correctly for table `measurement`
engine.execute("SELECT * FROM allTimePlayCount").fetchall()
#NOTE: NEED TO RENAME THIS TABLE FIELD

[('Hip-Hop', 1341400025),
 ('R&B', 1078842494),
 ('Dance Hall', 885886121),
 ('Electronic Dance Music (EDM)', 885886121),
 ('Electronic', 717754470),
 ('House', 626515721),
 ('Latin', 547112114),
 ('Pop', 454106193),
 ('Dubstep', 335729509),
 ('Reggaeton', 275786909),
 ('Deep House', 255038873),
 ('Rock', 189451042),
 ('Alternative Rock', 133344855),
 ('World Music', 123661893),
 ('Country', 121147282),
 ('Ambient', 116873369),
 ('Reggae', 109432998),
 ('SoundTrack', 90188235),
 ('Classical', 85909366),
 ('Folk', 77292122),
 ('Indie', 75959255),
 ('Metal', 74440815),
 ('Piano', 56291090),
 ('Drum Bass', 47488330),
 ('Jazz', 44354095),
 ('Techno', 25266998),
 ('Disco', 15870036)]

In [10]:
# Use a basic query to validate that the data was inserted correctly for table `station`
engine.execute("SELECT * FROM weeklyPlayCount").fetchall()


[('Hip-Hop', 43903699),
 ('R&B', 9492331),
 ('Pop', 3758825),
 ('Latin', 1861022),
 ('Country', 1732593),
 ('Electronic', 1522777),
 ('Dance Hall', 1320940),
 ('Electronic Dance Music (EDM)', 1320940),
 ('Reggaeton', 1299906),
 ('World Music', 1226076),
 ('Rock', 762932),
 ('Dubstep', 642044),
 ('House', 605811),
 ('Alternative Rock', 380492),
 ('Folk', 322964),
 ('SoundTrack', 277843),
 ('Ambient', 267882),
 ('Classical', 247915),
 ('Indie', 195854),
 ('Deep House', 183181),
 ('Reggae', 146809),
 ('Metal', 146375),
 ('Drum Bass', 96773),
 ('Jazz', 85321),
 ('Techno', 64935),
 ('Piano', 63082),
 ('Disco', 23889)]