In [21]:
import pandas as pd

def process_data():
    # Do not alter this line.
    biopics = pd.read_csv("biopics.csv", encoding='latin-1')
    
    # remove duplicate rows
    unique_data = biopics.drop_duplicates().copy()
    
    # rename the box_office column to earnings
    unique_data.rename(columns={"box_office":"earnings"}, inplace = True)
    
    # filter out Nan
    filtered_data = unique_data.dropna(subset=["earnings"])
    
    # filter movies released in 1990 or later
    filtered_data = filtered_data[filtered_data['year_release'] >= 1990]
    
    # Convert 'type_of_subject' and 'country' to categorical
    filtered_data['type_of_subject'] = filtered_data['type_of_subject'].astype('category')
    filtered_data['country'] = filtered_data['country'].astype('category')
    
    filtered_data['lead_actor_actress_known'] = ~filtered_data['lead_actor_actress'].isna()
    
    # Update earnings to be in millions of dollars
    filtered_data['earnings'] = filtered_data['earnings'] / 1_000_000
    
    # Reorder columns
    column_order = ["title", "year_release", "earnings", "country", "type_of_subject", "lead_actor_actress", "lead_actor_actress_known"]
    filtered_data = filtered_data[column_order]
    
    filtered_data = filtered_data.sort_values(by='earnings', ascending=False)
        
    df = pd.DataFrame(filtered_data)
    print(df)
    return df.reset_index(drop=True)

# Example usage:
result_df = process_data()



                     title  year_release   earnings    country  \
38         American Sniper          2014  350.00000         US   
547         The Blind Side          2009  256.00000         US   
334                Lincoln          2012  182.00000         US   
11        A Beautiful Mind          2001  171.00000         US   
124    Catch Me If You Can          2002  164.00000  US/Canada   
..                     ...           ...        ...        ...   
409                   Nora          2000    0.01230         UK   
624       The Look of Love          2013    0.01040      US/UK   
183                Ed Gein          2000    0.00571      US/UK   
314           Kid Cannabis          2014    0.00557         US   
485  Set Fire to the Stars          2014    0.00327         UK   

    type_of_subject     lead_actor_actress  lead_actor_actress_known  
38         Military         Bradley Cooper                      True  
547         Athlete          Quinton Aaron                      T