In [2]:
import os
import json
import glob
import pandas as pd

In [3]:
pd.set_option("display.max_colwidth", None)

In [4]:
folder_path = "input/international-movies-json"
json_files = glob.glob(os.path.join(folder_path, "*.json"))

data = []

for file_path in json_files:
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            file_data = json.load(file)
            for movie in file_data:

                movie_cast = movie.get("cast", [])
                cast_names = [actor["name"] for actor in movie_cast]
                movie["cast"] = ", ".join(cast_names)

                director_info = movie.get("director", {})
                movie["director"] = director_info.get("name", "")

                movie_genre = movie.get("genre", [])
                movie["genre"] = ", ".join(movie_genre)

                data.append(movie)

    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in {file_path}: {e}")
        
    except Exception as e:
        print(f"An error occurred while processing {file_path}: {e}")

df = pd.DataFrame(data)

df = df.drop(columns=['_id', 'poster_url', 'certificate'])
df.head()



Unnamed: 0,ImdbId,name,year,runtime,genre,ratingValue,summary_text,ratingCount,director,cast
0,tt0144999,"Isaia, horeve",1966,72 min,Comedy,4.9,A spirited wife tries to keep her husband's matchmaking and introduction agency afloat--while in the meantime--her daughter is ready to walk down the aisle with an affluent young heir. Have they struck gold or is this a beautiful dream?,66,Kostas Asimakopoulos,"Vasilis Avlonitis, Georgia Vasileiadou, Takis Miliadis, Giannis Fermis"
1,tt0142966,Voi juku - mikä lauantai,1979,81 min,Comedy,4.3,Add a Plot,66,Visa Mäkinen,"Matti Pulliainen, Erkki Liikanen, Pirjo Siiskonen, Riitta Käpynen, Timo Nissi"
2,tt0142887,Syöksykierre,1981,90 min,Drama,5.2,Add a Plot,66,Tapio Suominen,"Kimmo Liukkonen, Markku Toikka, Kai Honkanen, Albert Liukkonen"
3,tt0142383,Le huitième jour,1960,78 min,Drama,6.4,"Françoise, a thirty-year-old single typist lives only for her Sunday release. On this day, she attends a horse race under pretext of perfecting her taste for elegance and refinement. Georges, a neighbor widower, begins to court her.",66,Marcel Hanoun,"Emmanuelle Riva, Félix Marten, José Varela, Lucienne Bogaert"
4,tt0142346,Half Way to Hell,1960,75 min,"Action, Romance, Western",2.8,"Prior to the revolution, Senorita Maria San Carlos, the only daughter of a wealthy landowner, was betrothed to Escobar, a General, but Maria believes in marriage for love and runs away with... See full summary »",66,Victor Adamson,"Al Adamson, Al Adamson, Lyle Felice, Caroll Montour, Sergio Virel"


In [5]:
df.loc[df['name'] == 'Redlight Greenlight']

Unnamed: 0,ImdbId,name,year,runtime,genre,ratingValue,summary_text,ratingCount,director,cast
628222,tt1130968,Redlight Greenlight,2010,90 min,Drama,,"After 10 years of toil, a moral, determined and penniless agent trainee is given his only and last opportunity to make agent. Standing in his way; a 24 hour time frame, Hollywood's most ... See full summary »",,Sean Simpson,"David Zalkind, Theresa Barbosa-Adams, Jordan Gwynn Colton, Chris Facey"


In [6]:
df.to_parquet('movies.parquet')