In [1]:
# Getting Started with Pandas

# What is Pandas?
# Pandas is a powerful, open-source Python library used for data manipulation, cleaning, and analysis. It provides two main data structures:

# Series: A one-dimensional labeled array
# DataFrame: A two-dimensional labeled table (like an Excel sheet or SQL table)

# Pandas makes working with structured data fast, expressive, and flexible.
# If you're working with tables, spreadsheets, or CSVs in Pythonâ€”Pandas is your best friend.

In [2]:
# Why Use Pandas?


# Task	                  Without Pandas	            With Pandas

# Load a CSV	              open() + loops	            pd.read_csv()
# Filter rows	              Custom loop logic	            df[df["col"] > 5]
# Group & summarize	      Manual aggregation	        df.groupby()
# Merge two datasets	      Nested loops	                pd.merge()

# Pandas saves time, reduces code, and increases readability.

In [3]:
# Installing Pandas
# Install via pip3: 

# Or using conda (recommended if you're using Anaconda)
# conda install pandas

In [4]:
# Importing Pandas
# import pandas as pd

# pd is the standard alias used by the data science community.

In [1]:
# Pandas vs Excel vs SQL vs NumPy - 

    
# Tool	                         Strengths	                              Weaknesses
    
# Excel	                         Easy UI, great for small data	          Slow, manual, not scalable
# SQL	                             Efficient querying of big data	          Not ideal for transformation logic
# NumPy	                         Fast, low-level array operations	      No labels, harder for tabular data
# Pandas	                         Label-aware, fast, flexible	          Slightly steep learning curve


# Pandas bridges the gap between NumPy performance and Excel-like usability. Pandas is built on top of NumPy.
# END.

In [2]:
# Pandas is a fundamental Python library extensively used for data manipulation and analysis. 
# It provides powerful and flexible data structures, primarily Series and DataFrame, to efficiently
# handle and process structured data.
    
# Key Uses of Pandas:
    
# USE - 1 . Data Loading and Saving: 
# Pandas can read and write data from various formats, including CSV, Excel, SQL databases, JSON, and more.
import pandas as pd

# Read a CSV file
df = pd.read_csv('data.csv')

# Save a DataFrame to an Excel file
df.to_excel('output.xlsx', index=False)

In [3]:
# USE - 2. Data Cleaning and Preparation: 
# It offers functionalities to handle missing values 
# (e.g., fillna(), dropna()),remove duplicates (drop_duplicates()), and correct data types.


# Fill missing values with the mean of the column
df['column_name'].fillna(df['column_name'].mean(), inplace=True)

# Remove rows with any missing values
df.dropna(inplace=True)

In [4]:
# USE - 3. Data Exploration and Analysis: 
# Pandas enables descriptive statistics (describe()), filtering and selecting data 
# based on conditions, grouping data (groupby()), and performing aggregations.

# Get descriptive statistics of numerical columns
print(df.describe())

# Filter rows where 'Age' is greater than 30
filtered_df = df[df['Age'] > 30]

In [5]:
# USE - 4. Data Manipulation and Transformation: It allows for merging, joining,and
# concatenating DataFrames, reshaping data (e.g., pivot_table(), melt()), and applying
# functions to columns or rows.

# Merge two DataFrames
merged_df = pd.merge(df1, df2, on='common_column')

# Create a new column based on an existing one
df['new_column'] = df['old_column'] * 2

In [None]:
# Summary - 

# Pandas is a cornerstone for data-centric tasks in Python, providing a robust and intuitive
# framework for handling, cleaning, exploring, and manipulating data efficiently.