In [2]:
import pandas as pd

# Read the dataset from the CSV file with error handling and set low_memory=False to avoid chunked reading
df = pd.read_csv(r"C:\Users\waghb\OneDrive\Desktop\dsbdal\DSBDALExam DataSets\Movie\movies_metadata.csv", on_bad_lines='skip', quotechar='"', encoding='utf-8', low_memory=False)

# Convert 'popularity' column to numeric, coercing errors to NaN
df['popularity'] = pd.to_numeric(df['popularity'], errors='coerce')

# m. Create data subsets for different languages (original_language)
en_subset = df[df['original_language'] == 'en']

# n. Merge two subsets (e.g., merge en_subset with a subset based on popularity > 10)
high_popularity = df[df['popularity'] > 10]
merged_subsets = pd.concat([en_subset, high_popularity], axis=0).drop_duplicates()

# o. Sort Data using customer ratings (vote_average)
sorted_df = df.sort_values(by='vote_average', ascending=False)

# p. Transposing Data
transposed_df = df.transpose()

# q. Melting Data to long format
melted_df = pd.melt(df, id_vars=['original_language'],
                    value_vars=['vote_average', 'vote_count', 'popularity', 'runtime', 'revenue'],
                    var_name='measurement', value_name='value')

# r. Casting data to wide format
wide_df = melted_df.pivot_table(index='original_language', columns='measurement', values='value', aggfunc='mean')
wide_df = wide_df.reset_index()

# Print results to verify
print("English subset:\n", en_subset.head())
print("Merged Subsets:\n", merged_subsets.head())
print("Sorted by Vote Average:\n", sorted_df.head())
print("Transposed DataFrame:\n", transposed_df)
print("Melted DataFrame:\n", melted_df.head())
print("Wide Format DataFrame:\n", wide_df)

English subset:
    adult                              belongs_to_collection    budget  \
0  False  {'id': 10194, 'name': 'Toy Story Collection', ...  30000000   
1  False                                                NaN  65000000   
2  False  {'id': 119050, 'name': 'Grumpy Old Men Collect...         0   
3  False                                                NaN  16000000   
4  False  {'id': 96871, 'name': 'Father of the Bride Col...         0   

                                              genres  \
0  [{'id': 16, 'name': 'Animation'}, {'id': 35, '...   
1  [{'id': 12, 'name': 'Adventure'}, {'id': 14, '...   
2  [{'id': 10749, 'name': 'Romance'}, {'id': 35, ...   
3  [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...   
4                     [{'id': 35, 'name': 'Comedy'}]   

                               homepage     id    imdb_id original_language  \
0  http://toystory.disney.com/toy-story    862  tt0114709                en   
1                                   NaN   8844  t