In [3]:
import numpy as np
import pandas as pd



In [4]:
df = pd.read_csv('movies_metadata.csv', low_memory=False)

In [9]:
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")

Shape: (45466, 24)
Columns: ['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id', 'imdb_id', 'original_language', 'original_title', 'overview', 'popularity', 'poster_path', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'video', 'vote_average', 'vote_count']


In [10]:
print("=== DATASET INFO ===")
print(f"Total movies: {len(df)}")
print(f"Total columns: {len(df.columns)}")
print("\nFirst 5 rows:")
display(df.head())

print("\nData types:")
print(df.dtypes.value_counts())

=== DATASET INFO ===
Total movies: 45466
Total columns: 24

First 5 rows:


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0



Data types:
object     20
float64     4
Name: count, dtype: int64


In [11]:
ratings = df['vote_average'].dropna().astype(float).values

print("=== RATING STATISTICS USING NUMPY ===")
print(f"Number of valid ratings: {len(ratings)}")

=== RATING STATISTICS USING NUMPY ===
Number of valid ratings: 45460


In [None]:
stats = {
    "Mean": np.mean(ratings),
    "Median": np.median(ratings),
    "Standard Deviation": np.std(ratings),
    "Variance": np.var(ratings),
    "Minimum": np.min(ratings),
    "Maximum": np.max(ratings),
    "25th Percentile": np.percentile(ratings, 25),
    "50th Percentile (Median)": np.percentile(ratings, 50),
    "75th Percentile": np.percentile(ratings, 75),
    "Range": np.ptp(ratings)  
}

for stat_name, stat_value in stats.items():
    print(f"{stat_name}: {stat_value:.4f}")

Mean: 5.6182
Median: 6.0000
Standard Deviation: 1.9242
Variance: 3.7025
Minimum: 0.0000
Maximum: 10.0000
25th Percentile: 5.0000
50th Percentile (Median): 6.0000
75th Percentile: 6.8000
Range: 10.0000


In [None]:
RATING_THRESHOLD = 7.5

high_rating_mask = ratings > RATING_THRESHOLD
high_rated_indices = np.where(high_rating_mask)[0]

print(f"=== MOVIES WITH RATING > {RATING_THRESHOLD} ===")
print(f"Number of movies: {len(high_rated_indices)}")
print(f"Percentage: {(len(high_rated_indices)/len(ratings)*100):.2f}%")

high_rated_movies = []
for idx in high_rated_indices[:10]:  
    movie_idx = df[df['vote_average'] == ratings[idx]].index[0]
    title = df.loc[movie_idx, 'title']
    rating = ratings[idx]
    high_rated_movies.append((title, rating))

print("\nTop 10 high-rated movies:")
for i, (title, rating) in enumerate(high_rated_movies, 1):
    print(f"{i}. {title}: {rating:.1f}")

=== MOVIES WITH RATING > 7.5 ===
Number of movies: 3486
Percentage: 7.67%

Top 10 high-rated movies:
1. Toy Story: 7.7
2. Toy Story: 7.7
3. Casino: 7.8
4. The City of Lost Children: 7.6
5. Se7en: 8.1
6. Se7en: 8.1
7. Toy Story: 7.7
8. The City of Lost Children: 7.6
9. La Haine: 7.9
10. Toy Story: 7.7


In [None]:

from datetime import datetime
def extract_year(date_string):
   
    if pd.isna(date_string):
        return None
    
    date_string = str(date_string)
    
    date_formats = ['%Y-%m-%d', '%m/%d/%Y', '%d-%m-%Y', '%Y']
    
    for fmt in date_formats:
        try:
            date_obj = datetime.strptime(date_string, fmt)
            return date_obj.year
        except ValueError:
            continue
    
    import re
    year_match = re.search(r'\b(\d{4})\b', date_string)
    if year_match:
        return int(year_match.group(1))
    
    return None

df['release_year'] = df['release_date'].apply(extract_year)

print(f"Years extracted successfully!")
print(f"Unique years: {df['release_year'].nunique()}")
print(f"Year range: {df['release_year'].min()} - {df['release_year'].max()}")

Years extracted successfully!
Unique years: 135
Year range: 1874.0 - 2020.0


In [None]:
TARGET_YEAR = 2015

year_mask = df['release_year'] == TARGET_YEAR

movies_2015 = df[year_mask]

print(f"=== MOVIES FROM YEAR {TARGET_YEAR} ===")
print(f"Number of movies: {len(movies_2015)}")
print(f"Average rating: {movies_2015['vote_average'].mean():.2f}")

top_2015_movies = movies_2015.sort_values('vote_average', ascending=False)[['title', 'vote_average', 'vote_count']].head(10)
print("\nTop 10 movies from 2015:")
display(top_2015_movies)

=== MOVIES FROM YEAR 2015 ===
Number of movies: 1905
Average rating: 5.76

Top 10 movies from 2015:


Unnamed: 0,title,vote_average,vote_count
32440,Butterfly,10.0,3.0
42733,Nick Swardson: Taste It,10.0,2.0
41695,Крепость: щитом и мечом,10.0,1.0
32797,"OMG, I'm a Robot!",10.0,1.0
40777,"My Night, Your Day",10.0,1.0
36310,Sum of Histories,10.0,1.0
42477,American Sharia,10.0,1.0
39078,Old Fashioned: The Story of the Wisconsin Supp...,10.0,1.0
44550,Harold and Lillian: A Hollywood Love Story,10.0,1.0
41710,Nobody Walks in L.A.,9.5,2.0


In [None]:
print("=== PANDAS DATA HANDLING ===")

print("\n1. Data Types:")
print(df.dtypes)

print("\n2. Missing Values Analysis:")
missing_data = df.isnull().sum()
missing_percentage = (missing_data / len(df)) * 100

missing_df = pd.DataFrame({
    'Missing_Count': missing_data,
    'Missing_Percentage': missing_percentage
})

print("Top 10 columns with missing values:")
display(missing_df.sort_values('Missing_Percentage', ascending=False).head(10))

=== PANDAS DATA HANDLING ===

1. Data Types:
adult                     object
belongs_to_collection     object
budget                    object
genres                    object
homepage                  object
id                        object
imdb_id                   object
original_language         object
original_title            object
overview                  object
popularity                object
poster_path               object
production_companies      object
production_countries      object
release_date              object
revenue                  float64
runtime                  float64
spoken_languages          object
status                    object
tagline                   object
title                     object
video                     object
vote_average             float64
vote_count               float64
release_year             float64
dtype: object

2. Missing Values Analysis:
Top 10 columns with missing values:


Unnamed: 0,Missing_Count,Missing_Percentage
belongs_to_collection,40972,90.115691
homepage,37684,82.883913
tagline,25054,55.104914
overview,954,2.098271
poster_path,386,0.848986
runtime,263,0.578454
release_year,90,0.19795
status,87,0.191352
release_date,87,0.191352
imdb_id,17,0.037391


In [None]:
print("=== HANDLING MISSING DATA ===")

original_shape = df.shape

print("\n1. Before handling missing values:")
print(f"Total missing values: {df.isnull().sum().sum()}")

numeric_cols = ['runtime', 'vote_average', 'vote_count', 'popularity']
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        median_val = df[col].median()
        df[col].fillna(median_val, inplace=True)
        print(f"Filled {col} with median: {median_val:.2f}")

categorical_cols = ['original_language', 'status', 'homepage']
for col in categorical_cols:
    if col in df.columns:
        df[col].fillna('Unknown', inplace=True)
        print(f"Filled {col} with 'Unknown'")

df['title'].fillna(df['original_title'], inplace=True)
df['title'].fillna('Unknown Title', inplace=True)

print("\n2. After handling missing values:")
print(f"Total missing values: {df.isnull().sum().sum()}")
print(f"DataFrame shape: {df.shape} (Original: {original_shape})")

=== HANDLING MISSING DATA ===

1. Before handling missing values:
Total missing values: 105652
Filled runtime with median: 95.00
Filled vote_average with median: 6.00
Filled vote_count with median: 10.00
Filled popularity with median: 1.13
Filled original_language with 'Unknown'
Filled status with 'Unknown'
Filled homepage with 'Unknown'

2. After handling missing values:
Total missing values: 67584
DataFrame shape: (45466, 25) (Original: (45466, 25))


In [None]:
print("=== SORTING AND FILTERING MOVIES ===")

print("\n1. Top 20 Highest Rated Movies:")
top_movies = df.sort_values('vote_average', ascending=False)[['title', 'vote_average', 'vote_count', 'release_year']].head(20)
display(top_movies)

MIN_VOTES = 1000
print(f"\n2. Top 20 Highest Rated Movies (with at least {MIN_VOTES} votes):")
filtered_top_movies = df[df['vote_count'] >= MIN_VOTES]
filtered_top_movies = filtered_top_movies.sort_values('vote_average', ascending=False)[['title', 'vote_average', 'vote_count', 'release_year']].head(20)
display(filtered_top_movies)

MIN_RUNTIME = 60  
MAX_RUNTIME = 180  
print(f"\n3. Movies with runtime between {MIN_RUNTIME} and {MAX_RUNTIME} minutes:")
runtime_filtered = df[(df['runtime'] >= MIN_RUNTIME) & (df['runtime'] <= MAX_RUNTIME)]
print(f"Number of movies: {len(runtime_filtered)}")
print(f"Average rating: {runtime_filtered['vote_average'].mean():.2f}")

=== SORTING AND FILTERING MOVIES ===

1. Top 20 Highest Rated Movies:


Unnamed: 0,title,vote_average,vote_count,release_year
2948,Ten Benny,10.0,1.0,1995.0
32135,The Paw Project,10.0,1.0,2013.0
25544,Lunch Break,10.0,1.0,2008.0
36078,Looking for Grace,10.0,1.0,2016.0
25654,Fearless Fagan,10.0,1.0,1952.0
25689,Back to School with Franklin,10.0,1.0,2003.0
25748,Symphony of the Soil,10.0,1.0,2013.0
15397,Little Dorrit,10.0,1.0,1987.0
186,Reckless,10.0,1.0,1995.0
15383,Children in the Wind,10.0,1.0,1937.0



2. Top 20 Highest Rated Movies (with at least 1000 votes):


Unnamed: 0,title,vote_average,vote_count,release_year
834,The Godfather,8.5,6024.0,1972.0
40251,Your Name.,8.5,1030.0,2016.0
314,The Shawshank Redemption,8.5,8358.0,1994.0
292,Pulp Fiction,8.3,8670.0,1994.0
1184,Once Upon a Time in America,8.3,1104.0,1984.0
2211,Life Is Beautiful,8.3,3643.0,1997.0
5481,Spirited Away,8.3,3968.0,2001.0
1152,One Flew Over the Cuckoo's Nest,8.3,3001.0,1975.0
2843,Fight Club,8.3,9678.0,1999.0
12481,The Dark Knight,8.3,12269.0,2008.0



3. Movies with runtime between 60 and 180 minutes:
Number of movies: 41017
Average rating: 5.65


In [None]:

print("=== PARSING GENRES COLUMN ===")
print("\n=== DATASET INFO ===")

print(f"Total movies: {len(df)}")
print(f"Columns with genres data type: {df['genres'].dtype}")
import ast
print("\n=== SAMPLE GENRES DATA ===")
for i in range(5):
    print(f"Row {i}: {df['genres'].iloc[i]}")
def parse_genres_fast(genres_entry):
    
    if pd.isna(genres_entry):
        return []
    
    if isinstance(genres_entry, list):
        return [genre.get('name', '') for genre in genres_entry 
                if isinstance(genre, dict) and 'name' in genre]
    
    if isinstance(genres_entry, str):
        if genres_entry.strip().startswith('[') and genres_entry.strip().endswith(']'):
            try:
                parsed = ast.literal_eval(genres_entry)
                if isinstance(parsed, list):
                    return [genre.get('name', '') for genre in parsed 
                           if isinstance(genre, dict) and 'name' in genre]
            except (ValueError, SyntaxError):
                return []
    
    return []

print("Parsing genres...")
df['genres_list'] = df['genres'].apply(parse_genres_fast)

df['num_genres'] = df['genres_list'].apply(len)

print(f"\n=== GENRES PARSING RESULTS ===")
print(f"Movies with parsed genres: {df['genres_list'].notna().sum()}")
print(f"Movies with 0 genres: {(df['num_genres'] == 0).sum()}")
print(f"Movies with 1 genre: {(df['num_genres'] == 1).sum()}")
print(f"Movies with 2+ genres: {(df['num_genres'] >= 2).sum()}")
print(f"\nSample parsed genres:")
for i in range(3):
    print(f"Row {i}: {df['genres_list'].iloc[i]}")

=== PARSING GENRES COLUMN ===

=== DATASET INFO ===
Total movies: 45466
Columns with genres data type: object

=== SAMPLE GENRES DATA ===
Row 0: [{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]
Row 1: [{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]
Row 2: [{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]
Row 3: [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]
Row 4: [{'id': 35, 'name': 'Comedy'}]
Parsing genres...

=== GENRES PARSING RESULTS ===
Movies with parsed genres: 45466
Movies with 0 genres: 2442
Movies with 1 genre: 14559
Movies with 2+ genres: 28465

Sample parsed genres:
Row 0: ['Animation', 'Comedy', 'Family']
Row 1: ['Adventure', 'Fantasy', 'Family']
Row 2: ['Romance', 'Comedy']


In [None]:
print("\n=== EXTRACTING ALL UNIQUE GENRES ===")
from collections import Counter
all_genres = []
genre_counter = Counter()

for genres in df['genres_list']:
    all_genres.extend(genres)
    genre_counter.update(genres)

unique_genres = list(set(all_genres))

print(f"Total genre occurrences: {len(all_genres)}")
print(f"Unique genres found: {len(unique_genres)}")
print(f"\nTop 20 most common genres:")
for genre, count in genre_counter.most_common(20):
    print(f"  {genre}: {count:,} movies ({count/len(df)*100:.1f}%)")


=== EXTRACTING ALL UNIQUE GENRES ===
Total genre occurrences: 91106
Unique genres found: 32

Top 20 most common genres:
  Drama: 20,265 movies (44.6%)
  Comedy: 13,182 movies (29.0%)
  Thriller: 7,624 movies (16.8%)
  Romance: 6,735 movies (14.8%)
  Action: 6,596 movies (14.5%)
  Horror: 4,673 movies (10.3%)
  Crime: 4,307 movies (9.5%)
  Documentary: 3,932 movies (8.6%)
  Adventure: 3,496 movies (7.7%)
  Science Fiction: 3,049 movies (6.7%)
  Family: 2,770 movies (6.1%)
  Mystery: 2,467 movies (5.4%)
  Fantasy: 2,313 movies (5.1%)
  Animation: 1,935 movies (4.3%)
  Foreign: 1,622 movies (3.6%)
  Music: 1,598 movies (3.5%)
  History: 1,398 movies (3.1%)
  War: 1,323 movies (2.9%)
  Western: 1,042 movies (2.3%)
  TV Movie: 767 movies (1.7%)


In [None]:
print("\n=== FILTER MOVIES BY GENRE ===")
df_clean = df.copy()
genre_to_movies = {}
for idx, genres in enumerate(df_clean['genres_list']):
    for genre in genres:
        if genre not in genre_to_movies:
            genre_to_movies[genre] = []
        genre_to_movies[genre].append(idx)

print(f"Created lookup for {len(genre_to_movies)} genres")

def get_movies_by_genre_fast(genre_name):
    """Fast genre filtering using pre-built dictionary"""
    if genre_name not in genre_to_movies:
        return pd.DataFrame()
    indices = genre_to_movies[genre_name]
    return df_clean.iloc[indices]

TOP_N_GENRES = 10
top_genres = sorted(genre_to_movies.items(), key=lambda x: len(x[1]), reverse=True)[:TOP_N_GENRES]

print(f"\nTop {TOP_N_GENRES} Genres Analysis:")
print("-" * 50)
for genre, indices in top_genres:
    genre_movies = df_clean.iloc[indices]
    avg_rating = genre_movies['vote_average'].mean()
    avg_votes = genre_movies['vote_count'].mean()
    print(f"\n{genre}:")
    print(f"  Movies: {len(indices):,}")
    print(f"  Avg Rating: {avg_rating:.2f}")
    print(f"  Avg Votes: {avg_votes:.0f}")
    
    top_movies = genre_movies.nlargest(3, 'vote_average')[['title', 'vote_average']]
    for _, movie in top_movies.iterrows():
        print(f"  - {movie['title'][:40]}...: {movie['vote_average']:.1f}")


=== FILTER MOVIES BY GENRE ===
Created lookup for 32 genres

Top 10 Genres Analysis:
--------------------------------------------------

Drama:
  Movies: 20,265
  Avg Rating: 5.91
  Avg Votes: 97
  - Reckless...: 10.0
  - Girl in the Cadillac...: 10.0
  - Other Voices Other Rooms...: 10.0

Comedy:
  Movies: 13,182
  Avg Rating: 5.72
  Avg Votes: 109
  - Reckless...: 10.0
  - Dancer, Texas Pop. 81...: 10.0
  - Stiff Upper Lips...: 10.0

Thriller:
  Movies: 7,624
  Avg Rating: 5.62
  Avg Votes: 188
  - Goodbye & Amen...: 10.0
  - Summer City...: 10.0
  - American Hostage...: 10.0

Romance:
  Movies: 6,735
  Avg Rating: 5.80
  Avg Votes: 96
  - Girl in the Cadillac...: 10.0
  - Chilly Scenes of Winter...: 10.0
  - The Misadventures of Margaret...: 10.0

Action:
  Movies: 6,596
  Avg Rating: 5.58
  Avg Votes: 259
  - Ten Benny...: 10.0
  - High Roller: The Stu Ungar Story...: 10.0
  - Road to Redemption...: 10.0

Horror:
  Movies: 4,673
  Avg Rating: 5.23
  Avg Votes: 101
  - The Bride fr

In [None]:
print("\n=== SORTING AND FILTERING ===")


MIN_VOTES_FOR_RANKING = 100

print(f"1. Top Movies (with at least {MIN_VOTES_FOR_RANKING} votes):")
top_movies = df_clean[df_clean['vote_count'] >= MIN_VOTES_FOR_RANKING]
top_movies_sorted = top_movies.nlargest(10, 'vote_average')[['title', 'vote_average', 'vote_count']]
display(top_movies_sorted)


print(f"\n2. High Quality Movies (Rating > 7.5, Votes > 1000):")
high_quality = df_clean[
    (df_clean['vote_average'] > 7.5) & 
    (df_clean['vote_count'] > 1000)
]
print(f"  Found: {len(high_quality):,} movies")
if len(high_quality) > 0:
    display(high_quality[['title', 'vote_average', 'vote_count', ]].head())


print(f"\n3. Action & Adventure Movies:")
action_adventure = df_clean[
    df_clean['genres_list'].apply(lambda x: 'Action' in x and 'Adventure' in x)
]
print(f"  Found: {len(action_adventure):,} movies")
if len(action_adventure) > 0:
    display(action_adventure[['title', 'vote_average', 'genres_list']].head())



=== SORTING AND FILTERING ===
1. Top Movies (with at least 100 votes):


Unnamed: 0,title,vote_average,vote_count
10309,Dilwale Dulhania Le Jayenge,9.1,661.0
39085,Planet Earth,8.8,176.0
314,The Shawshank Redemption,8.5,8358.0
834,The Godfather,8.5,6024.0
40251,Your Name.,8.5,1030.0
13252,Dear Zachary: A Letter to a Son About His Father,8.4,146.0
292,Pulp Fiction,8.3,8670.0
522,Schindler's List,8.3,4436.0
1152,One Flew Over the Cuckoo's Nest,8.3,3001.0
1176,Psycho,8.3,2405.0



2. High Quality Movies (Rating > 7.5, Votes > 1000):
  Found: 182 movies


Unnamed: 0,title,vote_average,vote_count
0,Toy Story,7.7,5415.0
5,Heat,7.7,1886.0
15,Casino,7.8,1343.0
46,Se7en,8.1,5915.0
49,The Usual Suspects,8.1,3334.0



3. Action & Adventure Movies:
  Found: 1,733 movies


Unnamed: 0,title,vote_average,genres_list
7,Tom and Huck,5.4,"[Action, Adventure, Drama, Family]"
8,Sudden Death,5.5,"[Action, Adventure, Thriller]"
9,GoldenEye,6.6,"[Adventure, Action, Thriller]"
14,Cutthroat Island,5.7,"[Action, Adventure]"
22,Assassins,6.0,"[Action, Adventure, Crime, Thriller]"


In [None]:
print("\n=== GROUPING AND AGGREGATION ===")


print("\n1. Average Rating by Genre (All genres):")

genre_stats = []
for genre, indices in genre_to_movies.items():
    if len(indices) >= 10:  
        genre_movies = df_clean.iloc[indices]
        avg_rating = genre_movies['vote_average'].mean()
        median_rating = genre_movies['vote_average'].median()
        genre_stats.append({
            'Genre': genre,
            'Movie Count': len(indices),
            'Avg Rating': avg_rating,
            'Median Rating': median_rating,
            'Total Votes': genre_movies['vote_count'].sum()
        })

genre_stats_df = pd.DataFrame(genre_stats)
genre_stats_df = genre_stats_df.sort_values('Avg Rating', ascending=False)

print(f"Analyzed {len(genre_stats_df)} genres with at least 10 movies")
display(genre_stats_df.head(15))


=== GROUPING AND AGGREGATION ===

1. Average Rating by Genre (All genres):
Analyzed 20 genres with at least 10 movies


Unnamed: 0,Genre,Movie Count,Avg Rating,Median Rating,Total Votes
0,Animation,1935,6.275181,6.5,453058.0
11,History,1398,6.15422,6.5,150095.0
14,War,1323,6.041119,6.4,159258.0
6,Drama,20265,5.905221,6.2,1956070.0
16,Music,1598,5.879599,6.3,108333.0
8,Crime,4307,5.878407,6.2,758130.0
17,Documentary,3932,5.823093,6.6,50456.0
13,Mystery,2467,5.82047,6.0,409054.0
5,Romance,6735,5.802658,6.1,645639.0
4,Fantasy,2313,5.787722,6.0,769569.0
