In [None]:
import pandas as pd

In [None]:
df_movies = pd.read_csv('movies.csv', parse_dates=['YEAR'])
display(df_movies)

In [None]:
df_movies.info()

In [None]:
df_movies.isnull().sum()

In [None]:
def drop_null(data, col_name):
    df = data.dropna(subset=col_name)
    return df
cols = ['YEAR','GENRE','RunTime']
for col in cols:
    df_movies = drop_null(df_movies,col)

In [None]:
def fill_nulls(df, columns, default_value):
    """
    Fill null values in the specified columns of a pandas dataframe
    with the specified default value.

    Parameters:
    df (pandas.DataFrame): The dataframe to modify
    columns (list of str): The names of the columns to fill nulls in
    default_value (varies): The value to fill nulls with. This can be any
        valid value for the column's data type.

    Returns:
    pandas.DataFrame: The modified dataframe with nulls filled in.
    """
    for column in columns:
        df[column] = df[column].fillna(default_value)

    return df

col = ['RATING','VOTES','Gross']
df_movies = fill_nulls(df_movies,col,0)

In [None]:
def remove_newlines(df, columns):
    """
    Remove newlines from the strings in the specified columns of a pandas
    dataframe.

    Parameters:
    df (pandas.DataFrame): The dataframe to modify
    columns (list of str): The names of the columns to remove newlines from

    Returns:
    pandas.DataFrame: The modified dataframe with newlines removed from the
        specified columns.
    """
    for column in columns:
        df[column] = df[column].apply(lambda x: x.replace('\n', ''))

    return df

cols = ['GENRE','ONE-LINE']
df_movies = remove_newlines(df_movies,cols)

In [None]:
def clean_year_range_data(df, column):
    """
    Clean a column of year range data in parentheses to only include single
    years in a pandas dataframe.

    Parameters:
    df (pandas.DataFrame): The dataframe to modify
    column (str): The name of the column to modify

    Returns:
    pandas.DataFrame: The modified dataframe with cleaned year data in the
        specified column.
    """
    # Remove parentheses
    df[column] = df[column].str.replace('[()]', '')

    # Split year ranges on hyphen and keep only first year
    df[column] = df[column].apply(lambda x: x.split('-')[0] if '-' in x else x)

    # Keep only four-digit year strings
    df[column] = df[column].str.extract('(\d{4})', expand=False)

    return df

df_movies = clean_year_range_data(df_movies,'YEAR')

In [None]:
df_movies

In [None]:
df_movies.to_csv('clean_movies.csv')