In [1]:
import os
import pathlib

import django
import datetime
import dotenv
import pandas
from django.conf import settings

In [2]:
BASE_DIR = pathlib.Path('.')

In [3]:
dotenv.load_dotenv(BASE_DIR / '.env')

True

In [4]:
try:
    settings.configure(**{
        'DEBUG': True,
        'BASE_DIR': BASE_DIR,
        'INSTALLED_APPS': [
            'songs'
        ],
        'DATABASES': {
            'default': {
                'ENGINE': 'django.db.backends.sqlite3',
                'NAME': BASE_DIR / 'db.sqlite3',
            }
        },
        'MEDIA_PATH': BASE_DIR / 'media'
    })
except:
    pass

In [5]:
os.environ.setdefault('DJANGO_ALLOW_ASYNC_UNSAFE', 'true')

'true'

In [6]:
try:
    django.setup()
except:
    pass

## Songs

In [7]:
from songs.models import Song

In [36]:
songs = Song.objects.filter(year=0).values_list('id', 'artist__name', 'name', 'year')

In [37]:
songs_df = pandas.DataFrame(songs, columns=['id', 'artist', 'name', 'year'])

In [38]:
songs_df.rename(columns={'id': 'dbid'}, inplace=True)

In [39]:
songs_df.name.describe()

count            69
unique           69
top       The Curse
freq              1
Name: name, dtype: object

In [40]:
songs_df.sort_values('name', inplace=True)

In [41]:
songs_df.head()

Unnamed: 0,dbid,artist,name,year
63,553,Umngan'wami,Babes,0
36,382,Moriarty Meets Mama Rosin,Calypso Triste,0
44,411,Ouais Chery,Collabo,0
33,361,Mesparrow,I Don't Want To Grow Up,0
27,254,Jiva Phez'kombhede,Jiva Phez'kombhede,0


In [42]:
songs_df.to_csv('songs-to-enrich.csv')

In [43]:
ENRICHMENT_FILE = '/Users/johnpendenque/Downloads/songs.csv'

In [44]:
enriched_songs = pandas.read_csv(ENRICHMENT_FILE)

In [45]:
enriched_songs.head()

Unnamed: 0.1,Unnamed: 0,dbid,artist,name,year
0,63,553,Umngan'wami,Babes,0
1,36,382,Moriarty Meets Mama Rosin,Calypso Triste,2011
2,44,411,Ouais Chery,Collabo,0
3,33,361,Mesparrow,I Don't Want To Grow Up,2013
4,27,254,Jiva Phez'kombhede,Jiva Phez'kombhede,0


In [46]:
for item in  enriched_songs.itertuples():
    song = Song.objects.get(id=item.dbid)
    song.year = item.year
    song.save()

## Artist

In [8]:
from songs.models import Artist

In [9]:
artist = Artist.objects.get(id=123)

In [76]:
qs_artists = Artist.objects.filter(wikipedia_page='nan').values_list(
    'name', 'is_group', 'date_of_birth', 'wikipedia_page')

In [77]:
df_artists = pandas.DataFrame(list(qs_artists), columns=[
                              'name', 'is_group', 'date_of_birth', 'wikipedia_page'])

In [78]:
df_artists.describe()

Unnamed: 0,name,is_group,date_of_birth,wikipedia_page
count,62,62,24,62.0
unique,62,2,24,1.0
top,1T1,False,1997-06-15,
freq,1,56,1,62.0


In [79]:
df_artists.to_csv('artists_no_dob_wiki.csv', index=False)

In [70]:
ENRICHMENT_FILE = '/Users/johnpendenque/Downloads/enrich.csv'

In [58]:
df_enrichment = pandas.read_csv(ENRICHMENT_FILE)

In [59]:
df_enrichment.head()

Unnamed: 0,name,is_group,date_of_birth,wikipedia_page
0,1T1,False,,
1,Alexander,False,,
2,Bach,False,,
3,Bali Baby,False,1997-06-15,
4,BazBaz,False,,


In [60]:
from pandas import isna


for item in df_enrichment.itertuples():
    try:
        artist = Artist.objects.get(name=item.name)
    except:
        continue
    else:
        if isinstance(item.date_of_birth, str):
            try:
                d = datetime.datetime.strptime(item.date_of_birth, '%d-%m-%Y').date()
            except:
                d = datetime.datetime.strptime(item.date_of_birth, '%Y-%m-%d').date()

            artist.date_of_birth = d

        artist.wikipedia_page = item.wikipedia_page
        artist.is_group = item.is_group
        artist.save()