In [1]:
import pandas as pd


def load_data(filepath):
    "Carrega o dataset do CSV"
    df = pd.read_csv(filepath)
    return df


def clean_released_year(df):
    "Converte Released_Year para numerico e remove valores invalidos"
    df = df.copy()
    df['Released_Year'] = pd.to_numeric(df['Released_Year'], errors='coerce')
    df = df.dropna(subset=['Released_Year'])
    df['Released_Year'] = df['Released_Year'].astype(int)
    return df


def clean_runtime(df):
    "Limpa a coluna Runtime e converte para inteiro"
    df = df.copy()
    df['Runtime'] = (
        df['Runtime']
        .str.replace(' min', '', regex=False)
        .astype(float)
    )
    return df


def create_decade_column(df):
    "Cria a coluna Decade baseada em Released_Year"
    df = df.copy()
    df['Decade'] = (df['Released_Year'] // 10) * 10
    return df


def calculate_metrics_by_decade(df):
    "Calcula metricas agregadas por decada"
    metrics = df.groupby('Decade').agg(
        total_movies=('Series_Title', 'count'),
        avg_imdb_rating=('IMDB_Rating', 'mean'),
        avg_runtime=('Runtime', 'mean')
    ).reset_index()

    metrics['avg_imdb_rating'] = metrics['avg_imdb_rating'].round(2)
    metrics['avg_runtime'] = metrics['avg_runtime'].round(2)

    return metrics


def save_results(metrics, output_file):
    "Salva os resultados em um arquivo CSV"
    metrics.to_csv(output_file, index=False)
    print(f"Resultados salvos em: {output_file}")


def display_summary(df, metrics):
    "Exibe resumo dos resultados no terminal"
    print("\n" + "=" * 60)
    print("ANALISE DE FILMES DO IMDB POR DECADA")
    print("=" * 60)
    print(f"\nTotal de filmes analisados: {len(df)}")
    print("\n" + "-" * 60)
    print("METRICAS POR DECADA")
    print("-" * 60)
    print(metrics.to_string(index=False))
    print("=" * 60 + "\n")


def main():
    input_file = '/kaggle/input/imdb-dataset-of-top-1000-movies-and-tv-shows/imdb_top_1000.csv'
    output_file = 'imdb_metrics_by_decade.csv'

    df = load_data(input_file)
    df = clean_released_year(df)
    df = clean_runtime(df)
    df = create_decade_column(df)

    metrics = calculate_metrics_by_decade(df)

    save_results(metrics, output_file)
    display_summary(df, metrics)


if __name__ == '__main__':
    main()

Resultados salvos em: imdb_metrics_by_decade.csv

ANALISE DE FILMES DO IMDB POR DECADA

Total de filmes analisados: 999

------------------------------------------------------------
METRICAS POR DECADA
------------------------------------------------------------
 Decade  total_movies  avg_imdb_rating  avg_runtime
   1920            11             8.13        86.27
   1930            24             7.97       102.12
   1940            35             8.03       109.80
   1950            56             8.06       118.68
   1960            73             7.97       126.45
   1970            76             7.97       122.74
   1980            89             7.95       121.22
   1990           150             7.96       123.61
   2000           237             7.90       123.61
   2010           242             7.92       127.76
   2020             6             8.13       126.67

