In [None]:
import sqlite3
import pandas as pd
# Part 1: Reading Files


conn = sqlite3.connect('chinook.db')
cust_df = pd.read_sql_query("SELECT * FROM customers", conn)
print(cust_df.head(10))
conn.close()


iris_df = pd.read_json('iris.json')
print("Shape:", iris_df.shape)
print("Columns:", iris_df.columns.tolist())


titanic_df = pd.read_excel('titanic.xlsx')
print(titanic_df.head())


flights_df = pd.read_parquet('flights.parquet')
print(flights_df.info())


movie_df = pd.read_csv('movie.csv')
print(movie_df.sample(10))

In [None]:
# Part 2: Exploring DataFrames

iris_df.columns = iris_df.columns.str.lower()
print(iris_df[['sepal_length', 'sepal_width']])

titanic_over_30 = titanic_df[titanic_df['Age'] > 30]
print(titanic_over_30)

gender_counts = titanic_df['Sex'].value_counts()
print(gender_counts)

print(flights_df[['origin', 'dest', 'carrier']])

unique_destinations = flights_df['dest'].nunique()
print("Unique Destinations:", unique_destinations)

long_movies = movie_df[movie_df['duration'] > 120]

sorted_movies = long_movies.sort_values(by='director_facebook_likes', ascending=False)
print(sorted_movies)

In [None]:
# Part 3: Challenges and Explorations

iris_stats = iris_df.describe()
print(iris_stats)

age_min = titanic_df['Age'].min()
age_max = titanic_df['Age'].max()
age_sum = titanic_df['Age'].sum()
print(f"Min Age: {age_min}, Max Age: {age_max}, Sum Age: {age_sum}")

most_liked_director = movie_df.groupby('director_name')['director_facebook_likes'].sum().idxmax()
print("Most Liked Director:", most_liked_director)

longest_movies = movie_df.nlargest(5, 'duration')[['title', 'director_name']]
print(longest_movies)

missing_values = flights_df.isnull().sum()
print("Missing Values:", missing_values)

flights_df.fillna(flights_df.mean(), inplace=True)