In [1]:
import pandas as pd

In [2]:
def rename_cols(df: pd.DataFrame, prefix: str) -> None:
    """Add a prefix to every column of a dataframe

    Args:
        df (pd.DataFrame): a dataframe
        prefix (str): a string
    """
    col_names = df.columns.to_list()
    new_col_names = list(map(lambda name: f"{prefix}_{name}", col_names))
    df.columns = new_col_names

In [3]:
df = pd.read_json("../data/2015-01-01-15.json", lines=True)

In [4]:
df.drop(["payload", "org"], axis=1, inplace=True)

actor = pd.json_normalize(df["actor"])
actor = actor.drop("gravatar_id", axis=1)
rename_cols(actor, "actor")

repo = pd.json_normalize(df["repo"])
rename_cols(repo, "repo")

df = df.join([actor, repo])
df.drop(["actor", "repo"], axis=1, inplace=True)

In [5]:
df

Unnamed: 0,id,type,public,created_at,actor_id,actor_login,actor_url,actor_avatar_url,repo_id,repo_name,repo_url
0,2489651045,CreateEvent,True,2015-01-01 15:00:00+00:00,665991,petroav,https://api.github.com/users/petroav,https://avatars.githubusercontent.com/u/665991?,28688495,petroav/6.828,https://api.github.com/repos/petroav/6.828
1,2489651051,PushEvent,True,2015-01-01 15:00:01+00:00,3854017,rspt,https://api.github.com/users/rspt,https://avatars.githubusercontent.com/u/3854017?,28671719,rspt/rspt-theme,https://api.github.com/repos/rspt/rspt-theme
2,2489651053,PushEvent,True,2015-01-01 15:00:01+00:00,6339799,izuzero,https://api.github.com/users/izuzero,https://avatars.githubusercontent.com/u/6339799?,28270952,izuzero/xe-module-ajaxboard,https://api.github.com/repos/izuzero/xe-module...
3,2489651057,WatchEvent,True,2015-01-01 15:00:03+00:00,6894991,SametSisartenep,https://api.github.com/users/SametSisartenep,https://avatars.githubusercontent.com/u/6894991?,2871998,visionmedia/debug,https://api.github.com/repos/visionmedia/debug
4,2489651062,PushEvent,True,2015-01-01 15:00:03+00:00,485033,winterbe,https://api.github.com/users/winterbe,https://avatars.githubusercontent.com/u/485033?,28593843,winterbe/streamjs,https://api.github.com/repos/winterbe/streamjs
...,...,...,...,...,...,...,...,...,...,...,...
11346,2489678838,IssuesEvent,True,2015-01-01 15:59:59+00:00,1105372,ophian,https://api.github.com/users/ophian,https://avatars.githubusercontent.com/u/1105372?,2627116,s9y/Serendipity,https://api.github.com/repos/s9y/Serendipity
11347,2489678840,IssuesEvent,True,2015-01-01 15:59:59+00:00,9343331,No-CQRT,https://api.github.com/users/No-CQRT,https://avatars.githubusercontent.com/u/9343331?,25600089,No-CQRT/GooGuns,https://api.github.com/repos/No-CQRT/GooGuns
11348,2489678842,DeleteEvent,True,2015-01-01 15:59:59+00:00,2057932,LipkeGu,https://api.github.com/users/LipkeGu,https://avatars.githubusercontent.com/u/2057932?,24847217,LipkeGu/OpenRA,https://api.github.com/repos/LipkeGu/OpenRA
11349,2489678843,IssuesEvent,True,2015-01-01 15:59:59+00:00,9343331,No-CQRT,https://api.github.com/users/No-CQRT,https://avatars.githubusercontent.com/u/9343331?,25600089,No-CQRT/GooGuns,https://api.github.com/repos/No-CQRT/GooGuns


In [6]:
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df

Unnamed: 0,id,type,public,created_at,actor_id,actor_login,actor_url,actor_avatar_url,repo_id,repo_name,repo_url
0,2489651045,CreateEvent,True,2015-01-01 15:00:00+00:00,665991,petroav,https://api.github.com/users/petroav,https://avatars.githubusercontent.com/u/665991?,28688495,petroav/6.828,https://api.github.com/repos/petroav/6.828
1,2489651051,PushEvent,True,2015-01-01 15:00:01+00:00,3854017,rspt,https://api.github.com/users/rspt,https://avatars.githubusercontent.com/u/3854017?,28671719,rspt/rspt-theme,https://api.github.com/repos/rspt/rspt-theme
2,2489651053,PushEvent,True,2015-01-01 15:00:01+00:00,6339799,izuzero,https://api.github.com/users/izuzero,https://avatars.githubusercontent.com/u/6339799?,28270952,izuzero/xe-module-ajaxboard,https://api.github.com/repos/izuzero/xe-module...
3,2489651057,WatchEvent,True,2015-01-01 15:00:03+00:00,6894991,SametSisartenep,https://api.github.com/users/SametSisartenep,https://avatars.githubusercontent.com/u/6894991?,2871998,visionmedia/debug,https://api.github.com/repos/visionmedia/debug
4,2489651062,PushEvent,True,2015-01-01 15:00:03+00:00,485033,winterbe,https://api.github.com/users/winterbe,https://avatars.githubusercontent.com/u/485033?,28593843,winterbe/streamjs,https://api.github.com/repos/winterbe/streamjs
...,...,...,...,...,...,...,...,...,...,...,...
11346,2489678838,IssuesEvent,True,2015-01-01 15:59:59+00:00,1105372,ophian,https://api.github.com/users/ophian,https://avatars.githubusercontent.com/u/1105372?,2627116,s9y/Serendipity,https://api.github.com/repos/s9y/Serendipity
11347,2489678840,IssuesEvent,True,2015-01-01 15:59:59+00:00,9343331,No-CQRT,https://api.github.com/users/No-CQRT,https://avatars.githubusercontent.com/u/9343331?,25600089,No-CQRT/GooGuns,https://api.github.com/repos/No-CQRT/GooGuns
11348,2489678842,DeleteEvent,True,2015-01-01 15:59:59+00:00,2057932,LipkeGu,https://api.github.com/users/LipkeGu,https://avatars.githubusercontent.com/u/2057932?,24847217,LipkeGu/OpenRA,https://api.github.com/repos/LipkeGu/OpenRA
11349,2489678843,IssuesEvent,True,2015-01-01 15:59:59+00:00,9343331,No-CQRT,https://api.github.com/users/No-CQRT,https://avatars.githubusercontent.com/u/9343331?,25600089,No-CQRT/GooGuns,https://api.github.com/repos/No-CQRT/GooGuns
