# Importing Pandas Library
Import the Pandas library and check its version.

In [1]:
# Import the pandas library
import pandas as pd

# Check the version of pandas
print(pd.__version__)

1.5.3


# Series in Pandas
Create, manipulate, and access elements in a Pandas Series.

In [2]:
# Create a pandas Series
series = pd.Series([1, 2, 3, 4, 5])

# Display the Series
print(series)

# Access elements in the Series
print(series[0])  # Access the first element
print(series[2])  # Access the third element

# Manipulate the Series
series = series * 2  # Multiply all elements by 2
print(series)

# Add a new element to the Series
series = series.append(pd.Series([6]))
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64
1
3
0     2
1     4
2     6
3     8
4    10
dtype: int64
0     2
1     4
2     6
3     8
4    10
0     6
dtype: int64


  series = series.append(pd.Series([6]))


# DataFrames in Pandas
Create, manipulate, and access elements in a Pandas DataFrame.

In [3]:
# Create a pandas DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': ['a', 'b', 'c', 'd', 'e'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5]
})

# Display the DataFrame
print(df)

# Access elements in the DataFrame
print(df['A'])  # Access column 'A'
print(df.loc[0])  # Access the first row
print(df.loc[0, 'B'])  # Access the first row of column 'B'

# Manipulate the DataFrame
df['A'] = df['A'] * 2  # Multiply all elements in column 'A' by 2
print(df)

# Add a new column to the DataFrame
df['D'] = pd.Series(['f', 'g', 'h', 'i', 'j'])
print(df)

# Add a new row to the DataFrame
df = df.append({'A': 6, 'B': 'k', 'C': 6.6, 'D': 'l'}, ignore_index=True)
print(df)

   A  B    C
0  1  a  1.1
1  2  b  2.2
2  3  c  3.3
3  4  d  4.4
4  5  e  5.5
0    1
1    2
2    3
3    4
4    5
Name: A, dtype: int64
A      1
B      a
C    1.1
Name: 0, dtype: object
a
    A  B    C
0   2  a  1.1
1   4  b  2.2
2   6  c  3.3
3   8  d  4.4
4  10  e  5.5
    A  B    C  D
0   2  a  1.1  f
1   4  b  2.2  g
2   6  c  3.3  h
3   8  d  4.4  i
4  10  e  5.5  j
    A  B    C  D
0   2  a  1.1  f
1   4  b  2.2  g
2   6  c  3.3  h
3   8  d  4.4  i
4  10  e  5.5  j
5   6  k  6.6  l


  df = df.append({'A': 6, 'B': 'k', 'C': 6.6, 'D': 'l'}, ignore_index=True)


# Reading Data from Different Sources
Read data from different sources like CSV, Excel, SQL databases, etc., using Pandas.

In [4]:
# Read data from a CSV file
df_csv = pd.read_csv('data.csv')

# Display the DataFrame
print(df_csv)

# Read data from an Excel file
df_excel = pd.read_excel('data.xlsx')

# Display the DataFrame
print(df_excel)

# Read data from a SQL database
import sqlite3

# Create a connection to the database
connection = sqlite3.connect('data.db')

# Read data from the database
df_sql = pd.read_sql_query('SELECT * FROM table_name', connection)

# Display the DataFrame
print(df_sql)

# Close the connection to the database
connection.close()

FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

# Data Cleaning with Pandas
Clean data by handling missing values and outliers, converting data types, renaming columns, etc.

In [None]:
# Handle missing values
df['A'].fillna(df['A'].mean(), inplace=True)  # Fill missing values in column 'A' with the mean of column 'A'
df.dropna(inplace=True)  # Drop rows with missing values

# Handle outliers
Q1 = df['A'].quantile(0.25)  # Calculate the first quartile
Q3 = df['A'].quantile(0.75)  # Calculate the third quartile
IQR = Q3 - Q1  # Calculate the interquartile range
filter = (df['A'] >= Q1 - 1.5 * IQR) & (df['A'] <= Q3 + 1.5 * IQR)  # Define a filter for values within the range
df = df.loc[filter]  # Filter the DataFrame

# Convert data types
df['A'] = df['A'].astype('int')  # Convert column 'A' to integer

# Rename columns
df.rename(columns={'A': 'column_A'}, inplace=True)  # Rename column 'A' to 'column_A'

# Reset index
df.reset_index(drop=True, inplace=True)  # Reset the index of the DataFrame

# Display the cleaned DataFrame
print(df)

# Data Manipulation with Pandas
Manipulate data by sorting, filtering, grouping, merging, reshaping, etc.

In [None]:
# Sorting the DataFrame
df_sorted = df.sort_values(by='column_A')  # Sort the DataFrame by column 'column_A'
print(df_sorted)

# Filtering the DataFrame
df_filtered = df[df['column_A'] > 2]  # Filter the DataFrame for rows where 'column_A' is greater than 2
print(df_filtered)

# Grouping the DataFrame
df_grouped = df.groupby('column_A').mean()  # Group the DataFrame by 'column_A' and calculate the mean of the other columns
print(df_grouped)

# Merging DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'A': [1, 2, 3], 'C': [7, 8, 9]})
df_merged = pd.merge(df1, df2, on='A')  # Merge df1 and df2 on column 'A'
print(df_merged)

# Reshaping the DataFrame
df_pivot = df.pivot(index='column_A', columns='B', values='C')  # Pivot the DataFrame
print(df_pivot)

# Applying functions to the DataFrame
df['column_A'] = df['column_A'].apply(lambda x: x**2)  # Apply a function to square the values in column 'column_A'
print(df)

# Data Visualization with Pandas
Visualize data using the plot() function in Pandas.

In [None]:
# Import the matplotlib library for plotting
import matplotlib.pyplot as plt

# Plot a histogram of 'column_A'
df['column_A'].plot(kind='hist', rwidth=0.8)
plt.show()

# Plot a bar chart of 'B'
df['B'].value_counts().plot(kind='bar')
plt.show()

# Plot a line chart of 'C'
df['C'].plot(kind='line')
plt.show()

# Plot a boxplot of 'column_A'
df['column_A'].plot(kind='box')
plt.show()

# Plot a scatter plot of 'column_A' and 'C'
df.plot(kind='scatter', x='column_A', y='C')
plt.show()

# Time Series Analysis with Pandas
Perform time series analysis by resampling, shifting, rolling, etc.

In [None]:
# Create a date range
date_range = pd.date_range(start='1/1/2020', end='1/10/2020')
print(date_range)

# Create a time series DataFrame
df_time = pd.DataFrame(date_range, columns=['date'])
df_time['data'] = np.random.randint(0,100,size=(len(date_range)))
print(df_time)

# Set the date column as the index
df_time.set_index('date', inplace=True)
print(df_time)

# Resample the time series data by day
df_resampled_day = df_time.resample('D').mean()
print(df_resampled_day)

# Resample the time series data by week
df_resampled_week = df_time.resample('W').mean()
print(df_resampled_week)

# Shift the time series data
df_shifted = df_time.shift(1)
print(df_shifted)

# Calculate the difference in the time series data
df_diff = df_time.diff()
print(df_diff)

# Apply a rolling window function to the time series data
df_rolling = df_time.rolling(window=3).mean()
print(df_rolling)