In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the data into a Pandas DataFrame
df = pd.read_csv('data.csv', dtype=str, skiprows=1)

# Explore the data
df.head()  # Display the first 5 rows of the DataFrame
df.info()  # Get information about the DataFrame, including data types and missing values

# Handle missing values
df.isnull().sum()  # Count the number of missing values in each column
df.fillna(0, inplace=True)  # Replace missing values with 0

# Drop unnecessary columns
df.drop(['column1', 'column2'], axis=1, inplace=True)  # Drop columns by name

# Rename columns
df.rename(columns={'old_column_name': 'new_column_name'}, inplace=True)

# Convert data types
df['column_name'] = df['column_name'].astype(int)  # Convert column to integer data type

# Filter data by rows
filtered_df = df[df['column_name'] > 100]  # Filter rows based on a condition

# Filter data by columns
filtered_df = df[['col1', 'col2', 'col3']]

# Group and aggregate data
grouped_df = df.groupby('column_name').mean()  # Group by column and calculate mean
grouped_df = df.groupby('column_name')['column_name_2'].transform('sum').reset_index()

# Merge data
df1 = pd.read_csv('data1.csv')
df2 = pd.read_csv('data2.csv')
merged_df = pd.merge(df1, df2, on='common_column', how='left', suffixes=('_df1', '_df2'))  

# Pivot data
pivot_df = df.pivot_table(index='column1', columns='column2', values='column3', aggfunc=np.mean)  # Create a pivot table

# Reshape data
melted_df = pd.melt(df, id_vars=['column1'], value_vars=['column2', 'column3'])  # Melt data from wide to long format

# Sort data
sorted_df = df.sort_values('column_name', ascending=False)  # Sort DataFrame by a column in descending order

# Perform data calculations and transformations
df['new_column'] = df['column1'] + df['column2']  # Create a new column with calculated values
df['new_column'] = df['column1'].apply(lambda x: x * 2)  # Apply a function to a column

# Save data to a file
df.to_csv('processed_data.csv', index=False)  # Save DataFrame to a CSV file without including index