In [5]:
import json
import numpy as np
import pandas as pd
import time

In [6]:
def from_jsonl_to_df(path):
    t_start = time.time()
    with open(path, 'r') as json_file:
        json_list = list(json_file)
    json_list = [json.loads(json_str) for json_str in json_list]
    df = pd.DataFrame.from_dict(json_list, orient='columns')
    
    print("DataFrame of {}".format(len(df))+" rows loaded in {:.2f} sec".format(time.time()-t_start))
    return df

def from_csv_to_df(path):
    t_start=time.time()
    if "followers" in path:
        df=pd.read_csv(path, names=["follower", "person followed", "timestamp"], header=None)
    else:
        df=pd.read_csv(path)
    print("DataFrame of {}".format(len(df))+" rows loaded in {:.2f} sec".format(time.time()-t_start))
    return df

In [7]:
df_keaks = from_jsonl_to_df("data/full/keaks.jsonl")

DataFrame of 274472 rows loaded in 4.01 sec


In [20]:
df_beats =from_jsonl_to_df("data/full/beats.jsonl")

DataFrame of 59761 rows loaded in 1.76 sec


In [8]:
df_users =from_jsonl_to_df("data/full/users.jsonl")

DataFrame of 1162572 rows loaded in 34.25 sec


In [9]:
df_followers = from_csv_to_df("data/full/followers.csv")

DataFrame of 10506442 rows loaded in 13.36 sec


In [10]:
df_audiences = from_csv_to_df("data/full/audiences.csv")

DataFrame of 28952275 rows loaded in 35.94 sec


In [8]:
df_keaks.head(2)

Unnamed: 0,keakId,createdAt,likeCount,commentCount,viewCount,averageViewProgress,duration,hashtags,contentType,hasSmallThumbnail,lien
0,17301813623064450175,2019-10-10T21:20:59.2970322Z,4,2,42,7.8,107.0,"[91, 1, Rap, Pen, freestyle2019, Trap2K19]",freestyle,True,https://www.keakr.com/fr/keak/mon-mec-rap-1
1,6202649352,2018-04-02T15:06:34.9851924Z,2,2,53,0.0,68.0,[],freestyle,True,https://www.keakr.com/fr/keak/petit-salaire


In [12]:
df_audiences.head(2)

Unnamed: 0,userId,contentId,timestamp,progress,liked,commented,shared
0,users/6512051967,keaks/17301813623657913783,2020-01-01T00:00:00.4081666,0,True,True,True
1,users/17301813623701852659,keaks/17301813623464753700,2020-01-01T00:00:00.8245932,0,False,False,False


In [13]:
df_followers.head(2)

Unnamed: 0,follower,person followed,timestamp
0,users/17301813624860662494,users/17301813625195354624,2020-06-13T16:53:19.7077441Z
1,users/11928492392,users/12680019689,2019-04-11T22:23:51.9010202Z


In [14]:
df_beats.head(2)

Unnamed: 0,beatId,genres,moods,nbKeaks,nbLikes,beatmakerId,duration,bpm,createdAt,updatedAt,link,licenceType
0,17301813628927249101,"[{'id': '9920897543', 'name': 'Trap'}]","[{'id': '17301813622132424287', 'name': 'Dark'...",7,4,17301813625134492069,121.0,102.0,2021-09-20T18:16:47.1420645Z,2021-09-20T18:17:00.2684235Z,https://keakr.com/fr/beat/turquoiz,[free]
1,17301813627622625982,"[{'id': '9920897543', 'name': 'Trap'}]",[],1,5,17301813627622569830,163.0,125.0,2021-03-31T15:20:10.726762Z,2021-03-31T15:20:26.5760601Z,https://keakr.com/fr/beat/moula-i,[free]


In [15]:
df_users.head(2)

Unnamed: 0,userId,createdAt,lastConnection,usedGenres,listenedGenres,battleCreatedCount,battleLostCount,battleRespondedCount,battleWonCount,friendCount,...,mutualFollowCount,overallBeatUsage,PlaylistCount,prizeMoneyParticipationCount,prizeMoneyWinner,sessionCount,shareCount,viewCount,isBeatmaker,isSinger
0,12354401148,2019-03-02T10:23:33.0903001Z,2019-03-02T10:23:44.3283191Z,[],[],0,0,0,0,0,...,0.0,0.0,,0,False,2.0,,0,False,False
1,12354411487,2019-03-02T10:26:23.9016307Z,2020-09-24T06:16:46.8631984Z,[],[],0,0,0,0,0,...,,0.0,,0,False,8.0,,0,False,False


In [17]:
list(df_users.columns)

['userId',
 'createdAt',
 'lastConnection',
 'usedGenres',
 'listenedGenres',
 'battleCreatedCount',
 'battleLostCount',
 'battleRespondedCount',
 'battleWonCount',
 'friendCount',
 'keakCount',
 'keakrCoinGiven',
 'keakCoinReceived',
 'likeCount',
 'likeGivenCount',
 'mutualFollowCount',
 'overallBeatUsage',
 'PlaylistCount',
 'prizeMoneyParticipationCount',
 'prizeMoneyWinner',
 'sessionCount',
 'shareCount',
 'viewCount',
 'isBeatmaker',
 'isSinger']

In [12]:
df_keaks.tail(20)

Unnamed: 0,keakId,createdAt,likeCount,commentCount,viewCount,averageViewProgress,duration,hashtags,contentType,hasSmallThumbnail,lien
274452,11251947163,2018-11-30T20:31:54.8704002Z,3,1,184,0.0,,[freesstyle],,,https://www.keakr.com/fr/keak/1er-freestyle-3_
274453,17301813628946757093,2021-09-24T05:31:55.3098249Z,82,15,3981,15.43,31.0,[],importedVideo,True,https://www.keakr.com/fr/keak/inspi-du-jour-c
274454,17301813626938593807,2020-12-15T15:53:30.3573302Z,122,9,3197,11.12,109.0,[4],importedVideo,True,https://www.keakr.com/fr/keak/freestyle-zi2
274455,17301813627835970314,2021-04-23T03:11:10.3089155Z,2,17,16,0.0,,[],live,,https://www.keakr.com/fr/keak/tapping-pads-an
274456,17301813629970656511,2022-02-05T17:34:45.1980239Z,409,35,20,0.0,,[],live,,https://www.keakr.com/fr/keak/what-u-think-ab
274457,17301813629989180861,2022-02-09T07:20:42.5728099Z,1,1,3,,41.0,"[rap, freestyle, boombap, oldschool, 2022, ren...",importedVideo,True,https://www.keakr.com/fr/keak/freestyle-v4x
274458,11770037098,2019-01-18T04:24:00.6728086Z,158,32,5115,9.19,197.0,[],freestyle,True,https://www.keakr.com/fr/keak/mister-lonley
274459,17301813621722253695,2019-05-16T19:19:33.299397Z,0,1,102,0.0,126.0,[30block],freestyle,True,https://www.keakr.com/fr/keak/30blk
274460,17301813629879180579,2022-01-21T17:59:24.7350355Z,11,8,38,2.09,169.0,[DRILL],freestyle,True,https://www.keakr.com/fr/keak/nefertiti
274461,17301813629983640630,2022-02-08T05:54:01.2413765Z,3,1,9,0.0,66.0,[],freestyle,True,https://www.keakr.com/fr/keak/ft-avec-noah


In [18]:
df_keaks.hashtags.tail(30)

274442                      [bitch, music, freestyle, girl]
274443          [rapper, singer, matou, rap, freestyle, us]
274444                                                   []
274445                                                   []
274446                                                   []
274447                                                   []
274448                                              [gotit]
274449                                                   []
274450    [BLM, hiphop, hiphopartist, femalerapper, Lyri...
274451                                                   []
274452                                         [freesstyle]
274453                                                   []
274454                                                  [4]
274455                                                   []
274456                                                   []
274457    [rap, freestyle, boombap, oldschool, 2022, ren...
274458                                  

In [25]:
df_beats.genres.tail(20)

59741               [{'id': '9920897543', 'name': 'Trap'}]
59742    [{'id': '9920897532', 'name': 'Melancolic'}, {...
59743    [{'id': '17301813623043473093', 'name': 'Typeb...
59744               [{'id': '9920897543', 'name': 'Trap'}]
59745    [{'id': '17301813621692872888', 'name': 'Drill'}]
59746    [{'id': '17301813621692872888', 'name': 'Drill'}]
59747           [{'id': '9920897519', 'name': 'Boom bap'}]
59748               [{'id': '9920897543', 'name': 'Trap'}]
59749           [{'id': '9920897519', 'name': 'Boom bap'}]
59750    [{'id': '9920897521', 'name': 'Chill'}, {'id':...
59751               [{'id': '9920897543', 'name': 'Trap'}]
59752    [{'id': '17301813623043473093', 'name': 'Typeb...
59753    [{'id': '17301813621692872896', 'name': 'Regga...
59754    [{'id': '9920897526', 'name': 'Electro'}, {'id...
59755               [{'id': '9920897543', 'name': 'Trap'}]
59756               [{'id': '9920897543', 'name': 'Trap'}]
59757           [{'id': '9920897519', 'name': 'Boom bap'

In [44]:
#df_beats.genres.value_counts()
values_of_key = [i['name'] for i in df_beats.genres]
values_of_key

TypeError: list indices must be integers or slices, not str

In [43]:
list_of_dicts = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}, {"a": 7, "b": 8, "c": 9}]
#a_key = "c"

values_of_key = [i['c'] for i in list_of_dicts]
values_of_key

[3, 6, 9]