In [1]:
import pandas as pd
import ast

In [2]:
final_results = pd.DataFrame(columns = ["Name", "Description", "Category", "URL", "Small_Image", "Large_Image", "Tags"])

categories = ["Programmas", "Series", "Documentaires", "Films"]

In [3]:
"""This code process a set of CSV files, each corresponding to a category of items of the NPO platform, 
and concatenates them into a single dataframe called final_results."""

for category in categories:
    df = pd.read_csv("{}_complete.csv".format(category))
    df = df.drop("Unnamed: 0", axis=1)

    # Drop columns that do not have description
    df = df[~df['Description'].isnull()]

    # Convert the tags column into lists
    df["Tags"] = df["Tags"].apply(lambda x : ast.literal_eval(x))

    # Drop tags that contain the number of seasons
    df["Tags"] = df["Tags"].apply(lambda x: [tag for tag in x if not 'seizoen' in tag])

    # Drop columns that do not have any tag
    if category == "Programmas":
        df = df[df["Tags"].str.len() > 1]
    else:
        df = df[df["Tags"].str.len() > 0]

    # Add results of this category to the final dataframe
    final_results = pd.concat([df, final_results.loc[:]]).reset_index(drop=True)

In [4]:
display(final_results)

Unnamed: 0,Name,Description,Category,URL,Small_Image,Large_Image,Tags
0,Wallace & Gromit,Engelse animatiefilm. Wallace en Gromit runnen...,Films,https://www.npostart.nl/wallace-gromit/21-12-2...,https://weserv.moviemeter.nl/?url=https://www....,https://weserv.moviemeter.nl/?url=https://www....,"[Animatie, Komedie]"
1,Sissi,Op 16-jarige leeftijd ontmoet Sissi de keizer ...,Films,https://www.npostart.nl/sissi/25-12-2008/POW_0...,https://weserv.moviemeter.nl/?url=https://www....,https://weserv.moviemeter.nl/?url=https://www....,"[Drama, Romantiek]"
2,"Sissi, die junge Kaiserin",Sissi heeft het onbezorgde leventje moeten inr...,Films,https://www.npostart.nl/sissi-die-junge-kaiser...,https://weserv.moviemeter.nl/?url=https://www....,https://weserv.moviemeter.nl/?url=https://www....,"[Drama, Historisch]"
3,Charlie & Lola specials,Specials over de avonturen van Charlie en Lola.,Films,https://www.npostart.nl/charlie-lola-specials/...,https://weserv.moviemeter.nl/?url=https://www....,https://weserv.moviemeter.nl/?url=https://www....,"[Misdaad, Drama]"
4,"Sissi, Schicksalsjahre einer Kaiserin",Franz Joseph en Sissi zijn noodgedwongen regel...,Films,https://www.npostart.nl/sissi-schicksalsjahre-...,https://weserv.moviemeter.nl/?url=https://www....,https://weserv.moviemeter.nl/?url=https://www....,"[Drama, Historisch]"
...,...,...,...,...,...,...,...
1688,Fight or Flight,Sahil Amar Aïssa portretteert twee mensen die ...,Programmas,https://www.npostart.nl/fight-or-flight/BV_101...,https://images.npo.nl/tile/320x180/1903091.jpg,https://images.npo.nl/tile/320x180/1903091.jpg,"[Actie, Thriller]"
1689,Flikken Maastricht,"Moord en doodslag, chantagepraktijken, verdwij...",Programmas,https://www.npostart.nl/flikken-maastricht/AT_...,https://images.npo.nl/tile/320x180/Flikken_Maa...,https://images.npo.nl/tile/320x180/Flikken_Maa...,"[Misdaad, Drama]"
1690,2Doc,Alle documentaires die onder de noemer 2Doc: u...,Programmas,https://www.npostart.nl/2doc/POMS_S_VPRO_472240,https://images.npo.nl/tile/320x180/V3_nacht_cd...,https://images.npo.nl/tile/320x180/V3_nacht_cd...,"[Komedie, Romantiek]"
1691,NOS Journaal,"Het laatste nieuws, gebeurtenissen van nationa...",Programmas,https://www.npostart.nl/nos-journaal/NOSJournaal,https://images.npo.nl/tile/320x180/nos_journaa...,https://images.npo.nl/tile/320x180/nos_journaa...,"[Drama, Romantiek]"


In [5]:
# Count the number of items per category
final_results.groupby("Category")["Category"].count()

Category
Documentaires    193
Films            308
Programmas       930
Series           262
Name: Category, dtype: int64

In [6]:
final_results.to_csv("Final_NPO_Data.csv", index=False)