#### Mayo 2024 
[Canción mas escuchada en Colombia en el 2016 🎼🎼](https://open.spotify.com/track/1zi7xx7UVEFkmKfv06H8x0?si=6b985fe4b3f54089)
# Creando una base de datos mas pequeña 😅
### [Por: Carlos Eduardo Cortés Gomez](https://carloseduardo.omg.lol/)
-------------------------------------


In [None]:
### Biblioteca

from sqlalchemy import create_engine, Column, Integer, String, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import PrimaryKeyConstraint
from sqlalchemy.orm import sessionmaker
from datetime import datetime

In [None]:
from concurrent.futures import ThreadPoolExecutor
from sqlalchemy.orm import sessionmaker

In [None]:
### Declaraciones
Base = declarative_base()

# Define models
class SliceTime(Base):
    __tablename__ = 'slice_time'
    slice = Column(String, primary_key=True)
    generated_on = Column(String)
    version = Column(String)

class Playlists(Base):
    __tablename__ = 'playlists'
    slice = Column(String, ForeignKey('slice_time.slice'))
    pid = Column(Integer, primary_key=True)
    modified_at = Column(Integer)
    num_tracks = Column(Integer)
    num_albums = Column(Integer)
    num_followers = Column(Integer)
    num_edits = Column(Integer)        # New column for number of edits
    duration_ms = Column(Integer)      # New column for total duration in milliseconds
    num_artists = Column(Integer)      # New column for number of artists

class Song(Base):
    __tablename__ = 'song'
    track_uri = Column(String, nullable=False)
    pid = Column(Integer, ForeignKey('playlists.pid'), nullable=False)
    pos = Column(Integer)
    artist_name = Column(String)
    artist_uri = Column(String)
    track_name = Column(String)
    album_uri = Column(String)
    duration_ms = Column(Integer)
    album_name = Column(String)
    
    __table_args__ = (
        PrimaryKeyConstraint('track_uri', 'pid', 'pos'),
        {},
    )

In [None]:
engine_original = create_engine(
    'sqlite:///music.db',
    connect_args={'check_same_thread': False},  # Allows multiple threads to call this engine
)
engine_new = create_engine(
    'sqlite:///music_smaller_50000.db',
    connect_args={'check_same_thread': False},
)

# Ensure you create tables like this:
Base.metadata.create_all(engine_original)
Base.metadata.create_all(engine_new)


Session_original = sessionmaker(bind=engine_original)

Session_new = sessionmaker(bind=engine_new)

In [None]:
def transfer_playlist(playlist_id):
    session_original = Session_original()
    session_new = Session_new()

    try:
        playlist = session_original.query(Playlists).filter(Playlists.pid == playlist_id).one()
        print(f"Processing playlist ID {playlist_id}")

        # Check if the playlist already exists in the new database
        existing_playlist = session_new.query(Playlists).filter(Playlists.pid == playlist.pid).first()
        if existing_playlist is None:
            new_playlist = Playlists(
                slice=playlist.slice,
                pid=playlist.pid,
                modified_at=playlist.modified_at,
                num_tracks=playlist.num_tracks,
                num_albums=playlist.num_albums,
                num_followers=playlist.num_followers,
                num_edits=playlist.num_edits,
                duration_ms=playlist.duration_ms,
                num_artists=playlist.num_artists
            )
            session_new.add(new_playlist)
            songs = session_original.query(Song).filter(Song.pid == playlist.pid).all()
            for song in songs:
                new_song = Song(
                    track_uri=song.track_uri,
                    pid=song.pid,
                    pos=song.pos,
                    artist_name=song.artist_name,
                    artist_uri=song.artist_uri,
                    track_name=song.track_name,
                    album_uri=song.album_uri,
                    duration_ms=song.duration_ms,
                    album_name=song.album_name
                )
                session_new.add(new_song)
            session_new.commit()
        else:
            print(f"Playlist ID {playlist_id} already exists in the new database.")
    except Exception as e:
        print(f"Error processing playlist ID {playlist_id}: {str(e)}")
    finally:
        session_original.close()
        session_new.close()

def main():
    session = Session_original()
    playlist_ids = session.query(Playlists.pid).limit(10000).all()
    session.close()

    # Parallel processing using ThreadPoolExecutor
    with ThreadPoolExecutor(max_workers=20) as executor:
        executor.map(transfer_playlist, [pid[0] for pid in playlist_ids])

if __name__ == '__main__':
    main()

In [None]:
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, PrimaryKeyConstraint
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from concurrent.futures import ThreadPoolExecutor
import time

Base = declarative_base()

# Define models
class SliceTime(Base):
    __tablename__ = 'slice_time'
    slice = Column(String, primary_key=True)
    generated_on = Column(String)
    version = Column(String)

class Playlists(Base):
    __tablename__ = 'playlists'
    slice = Column(String, ForeignKey('slice_time.slice'))
    pid = Column(Integer, primary_key=True)
    modified_at = Column(Integer)
    num_tracks = Column(Integer)
    num_albums = Column(Integer)
    num_followers = Column(Integer)
    num_edits = Column(Integer)  # New column for number of edits
    duration_ms = Column(Integer)  # New column for total duration in milliseconds
    num_artists = Column(Integer)  # New column for number of artists

class Song(Base):
    __tablename__ = 'song'
    track_uri = Column(String, nullable=False)
    pid = Column(Integer, ForeignKey('playlists.pid'), nullable=False)
    pos = Column(Integer)
    artist_name = Column(String)
    artist_uri = Column(String)
    track_name = Column(String)
    album_uri = Column(String)
    duration_ms = Column(Integer)
    album_name = Column(String)
    __table_args__ = (
        PrimaryKeyConstraint('track_uri', 'pid', 'pos'),
        {},
    )

# Setup database connections
engine_original = create_engine(
    'sqlite:///music.db',
    connect_args={'check_same_thread': False},
)
Base.metadata.create_all(engine_original)

engine_new = create_engine(
    'sqlite:///music_smaller_50000.db',
    connect_args={'check_same_thread': False},
)
Base.metadata.create_all(engine_new)

Session_original = sessionmaker(bind=engine_original)
Session_new = sessionmaker(bind=engine_new)

def transfer_playlist(playlist_id):
    session_original = Session_original()
    session_new = Session_new()
    try:
        playlist = session_original.query(Playlists).filter(Playlists.pid == playlist_id).one()
        print(f"Processing playlist ID {playlist_id}")

        existing_playlist = session_new.query(Playlists).filter(Playlists.pid == playlist.pid).first()
        if existing_playlist is None:
            new_playlist = Playlists(
                slice=playlist.slice,
                pid=playlist.pid,
                modified_at=playlist.modified_at,
                num_tracks=playlist.num_tracks,
                num_albums=playlist.num_albums,
                num_followers=playlist.num_followers,
                num_edits=playlist.num_edits,
                duration_ms=playlist.duration_ms,
                num_artists=playlist.num_artists
            )
            session_new.add(new_playlist)
            
            songs = session_original.query(Song).filter(Song.pid == playlist.pid).all()
            for song in songs:
                new_song = Song(
                    track_uri=song.track_uri,
                    pid=song.pid,
                    pos=song.pos,
                    artist_name=song.artist_name,
                    artist_uri=song.artist_uri,
                    track_name=song.track_name,
                    album_uri=song.album_uri,
                    duration_ms=song.duration_ms,
                    album_name=song.album_name
                )
                session_new.add(new_song)
            session_new.commit()
        else:
            print(f"Playlist ID {playlist_id} already exists in the new database.")
    except Exception as e:
        print(f"Error processing playlist ID {playlist_id}: {str(e)}")
    finally:
        session_original.close()
        session_new.close()

def main():
    session = Session_original()
    playlist_ids = session.query(Playlists.pid).limit(8000).all()
    session.close()

    with ThreadPoolExecutor(max_workers=10) as executor:
        executor.map(transfer_playlist, [pid[0] for pid in playlist_ids])

if __name__ == '__main__':
    main()
