📦 Installation
pip install pandas
pip install --upgrade pandas


✅ Pandas Series
import pandas as pd

s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(s)
Output:

Copy code
a    10
b    20
c    30
dtype: int64


✅ Pandas DataFrame

data = {'Name': ['Alice', 'Bob'], 'Age': [25, 30]}
df = pd.DataFrame(data, dtype='int64', index=['r1', 'r2'])
print(df)
Output:

markdown
Copy code
     Name  Age
r1  Alice   25
r2    Bob   30
📏 Shape
python
Copy code
print(df.shape)
Output:

scss
Copy code
(2, 2)


👀 Head & Tail
python
Copy code
print(df.head(1))
print(df.tail(1))
Output:

pgsql
Copy code
     Name  Age
r1  Alice   25
     Name  Age
r2    Bob   30


🔁 Convert to NumPy array
python
Copy code
print(df.values)
Output:

lua
Copy code
[['Alice' 25]
 ['Bob' 30]]
📄 Columns
python
Copy code
print(df.columns)
Output:

pgsql
Copy code
Index(['Name', 'Age'], dtype='object')


💾 To CSV / Read CSV
python
Copy code
df.to_csv("data.csv", index=False)
df2 = pd.read_csv("data.csv")
print(df2)
Output:

markdown
Copy code
    Name  Age
0  Alice   25
1    Bob   30
🧹 Drop Columns
python
Copy code
print(df.drop(columns=['Age']))
Output:

markdown
Copy code
     Name
r1  Alice
r2    Bob


📊 Describe & Info
python
Copy code
print(df.describe())
print(df.info())
Output:

pgsql
Copy code
             Age
count   2.000000
mean   27.500000
std     3.535534
min    25.000000
max    30.000000
<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, r1 to r2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Name    2 non-null      object
 1   Age     2 non-null      int64
🔍 iloc and loc


python
Copy code
print(df.iloc[0])
print(df.loc['r2'])
Output:

pgsql
Copy code
Name    Alice
Age        25
Name: r1, dtype: object

Name     Bob
Age        30
Name: r2, dtype: object
🎯 Access Column / Indexing / Slicing
python
Copy code
print(df['Name'])       # Column
print(df[0:1])          # Slice row
print(df[['Name', 'Age']])  # Multi-column


🔢 columns.get_loc
python
Copy code
print(df.columns.get_loc('Age'))
Output:

Copy code
1
🎯 Fancy Indexing
python
Copy code
print(df[df['Age'] > 25])
Output:

markdown
Copy code
    Name  Age
r2   Bob   30


🔁 Unique
python
Copy code
print(df['Age'].unique())
Output:

csharp
Copy code
[25 30]


📊 Sorting
python
Copy code
print(df.sort_values(by='Age', ascending=False))
Output:

markdown
Copy code
    Name  Age
r2   Bob   30
r1 Alice   25


🔢 get_dummies
python
Copy code
df3 = pd.DataFrame({'Gender': ['M', 'F', 'F']})
print(pd.get_dummies(df3, columns=['Gender']))
Output:

nginx
Copy code
   Gender_F  Gender_M
0         0         1
1         1         0
2         1         0


🔁 groupby
python
Copy code
df = pd.DataFrame({
    'Team': ['A', 'A', 'B', 'B'],
    'Score': [10, 20, 15, 30]
})

print(df.groupby('Team')['Score'].mean())

# Iteration
for name, group in df.groupby('Team'):
    print(name)
    print(group)
Output:

less
Copy code
Team
A    15.0
B    22.5
Name: Score, dtype: float64
A
  Team  Score
0    A     10
1    A     20
B
  Team  Score
2    B     15
3    B     30


🔄 Pivot & Grouper
python
Copy code
df = pd.DataFrame({
    'Date': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02']),
    'Item': ['Apple', 'Banana', 'Apple'],
    'Sales': [100, 150, 200]
})

print(df.pivot(index='Date', columns='Item', values='Sales'))

# Grouper by day
print(df.groupby(pd.Grouper(key='Date', freq='D')).sum())
Output (pivot):

yaml
Copy code
Item        Apple  Banana
Date                     
2023-01-01    100     150
2023-01-02    200     NaN
🔗 Concat
python
Copy code
df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'A': [3, 4]})
print(pd.concat([df1, df2], ignore_index=True))
Output:

css
Copy code
   A
0  1
1  2
2  3
3  4
🔗 Merge
python
Copy code
df1 = pd.DataFrame({'id': [1, 2], 'name': ['A', 'B']})
df2 = pd.DataFrame({'id': [2, 3], 'marks': [90, 80]})
print(pd.merge(df1, df2, on='id', how='inner'))
Output:

css
Copy code
   id name  marks
0   2    B     90
🧹 dropna, fillna, interpolate
python
Copy code
df = pd.DataFrame({'a': [1, None, 3]})
print(df.dropna())
print(df.fillna(0))
print(df.interpolate())
Output:

css
Copy code
     a
0  1.0
2  3.0

     a
0  1.0
1  0.0
2  3.0

     a
0  1.0
1  2.0
2  3.0


📈 Plotting (after installing matplotlib)

pip install matplotlib

import matplotlib.pyplot as plt
df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
df.plot(x='x', y='y')
plt.show()

In [2]:
import pandas as pd
import numpy as np

In [None]:
print(pd.__version__)
pd.DF=pd.DataFrame

# This line assigns an alias DF to pd.DataFrame, so you can
# use pd.DF instead of pd.DataFrame.

2.3.0


In [7]:
l1 = [10,20,30,40,50]
pds = pd.Series(l1,index = ['A','B','C','D','E'],dtype='int')
pds

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [8]:
type(pds)

pandas.core.series.Series

In [9]:
pds['B']

np.int64(20)

In [10]:
marks = {
    'Phy': [92,71,60,95,99],
    'Chem': [75,93,99,58,80],
    'Maths': [95,99,93,86,89]
}

In [11]:
marks

{'Phy': [92, 71, 60, 95, 99],
 'Chem': [75, 93, 99, 58, 80],
 'Maths': [95, 99, 93, 86, 89]}

In [12]:
df=pd.DF(marks,index=['A','B','C','D','E'])

In [13]:
df

Unnamed: 0,Phy,Chem,Maths
A,92,75,95
B,71,93,99
C,60,99,93
D,95,58,86
E,99,80,89


In [14]:
df=pd.DataFrame(marks)

In [15]:
df

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [18]:
na=np.random.randint(1,100,(10,3))
na

array([[98, 80, 43],
       [43, 79, 60],
       [64, 62, 87],
       [29, 49, 11],
       [99, 20, 93],
       [15,  3,  5],
       [77, 72, 39],
       [73, 67, 68],
       [90, 87, 30],
       [18, 87, 54]], dtype=int32)

In [19]:
df2=pd.DataFrame(na)
df2

Unnamed: 0,0,1,2
0,98,80,43
1,43,79,60
2,64,62,87
3,29,49,11
4,99,20,93
5,15,3,5
6,77,72,39
7,73,67,68
8,90,87,30
9,18,87,54


In [21]:
df2=pd.DataFrame(na,columns=['phy', 'Chem','Maths'])

In [22]:
df2

Unnamed: 0,phy,Chem,Maths
0,98,80,43
1,43,79,60
2,64,62,87
3,29,49,11
4,99,20,93
5,15,3,5
6,77,72,39
7,73,67,68
8,90,87,30
9,18,87,54


In [23]:
df

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [24]:
df.shape

(5, 3)

In [29]:
df2.head(2)

Unnamed: 0,phy,Chem,Maths
0,98,80,43
1,43,79,60


In [30]:
df2.head()

Unnamed: 0,phy,Chem,Maths
0,98,80,43
1,43,79,60
2,64,62,87
3,29,49,11
4,99,20,93


In [31]:
df2.tail(3)

Unnamed: 0,phy,Chem,Maths
7,73,67,68
8,90,87,30
9,18,87,54


In [32]:
df2.tail()

Unnamed: 0,phy,Chem,Maths
5,15,3,5
6,77,72,39
7,73,67,68
8,90,87,30
9,18,87,54


In [None]:
df.values
# The .values attribute returns the underlying NumPy array
# of a DataFrame — essentially stripping away the index and column labels.

array([[92, 75, 95],
       [71, 93, 99],
       [60, 99, 93],
       [95, 58, 86],
       [99, 80, 89]])

In [34]:
df.columns

Index(['Phy', 'Chem', 'Maths'], dtype='object')

In [35]:
df

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [None]:
df.to_csv("Marks.csv")
# This line saves the DataFrame df to a CSV file named
#  Marks.csv in the current working directory.

In [None]:
ndf = pd.read_csv("Marks.csv",index_col='Unnamed: 0')
ndf

# ndf = pd.read_csv("Marks.csv", index_col='Unnamed: 0')
# This line reads the CSV file Marks.csv into a new DataFrame ndf, and uses the 'Unnamed: 0' column as the index.

# 🔍 Why 'Unnamed: 0'?
# When you save a DataFrame with index=True (which is the default) using:

# df.to_csv("Marks.csv")
# It adds the index as the first column, but without a column name. So, when you read it back, pandas names that column 'Unnamed: 0'.

# 

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [None]:
df.to_csv("Marks.csv",sep='|',index = False)

# This line saves the DataFrame df to a CSV file named Marks.csv, using:

# | (pipe) as the separator instead of the default comma ,

# index=False to exclude the index column from the file



In [39]:
ndf = pd.read_csv("Marks.csv",sep='|')
ndf

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [40]:
ndf.index = ['A','B','C','D','E']
ndf

Unnamed: 0,Phy,Chem,Maths
A,92,75,95
B,71,93,99
C,60,99,93
D,95,58,86
E,99,80,89


In [None]:
df.drop(columns=['Chem'])

# This line drops the column named 'Chem' from the DataFrame df.

# 🔍 Important Notes:
# Non-destructive by default: It returns a new DataFrame 
# without 'Chem', but df itself remains unchanged unless 
# you assign it or use inplace=True.

# Raises error if 'Chem' doesn't exist.

Unnamed: 0,Phy,Maths
0,92,95
1,71,99
2,60,93
3,95,86
4,99,89


In [42]:
df

Unnamed: 0,Phy,Chem,Maths
0,92,75,95
1,71,93,99
2,60,99,93
3,95,58,86
4,99,80,89


In [43]:
df2

Unnamed: 0,phy,Chem,Maths
0,98,80,43
1,43,79,60
2,64,62,87
3,29,49,11
4,99,20,93
5,15,3,5
6,77,72,39
7,73,67,68
8,90,87,30
9,18,87,54


In [None]:
df2.describe()

# 🔍 What it does:
# Generates summary statistics for numeric columns in DataFrame df2.

# Useful for quick inspection of data distribution, spread, and outliers.



Unnamed: 0,phy,Chem,Maths
count,10.0,10.0,10.0
mean,60.6,60.6,49.0
std,32.273828,28.640298,29.371188
min,15.0,3.0,5.0
25%,32.5,52.25,32.25
50%,68.5,69.5,48.5
75%,86.75,79.75,66.0
max,99.0,87.0,93.0
