## Basics of Pandas in Python


Pandas is an open-source data manipulation and analysis library for Python. It provides data structures like Series and DataFrame which make it easy to work with structured data.
    

In [1]:
import pandas as pd

# Create a Pandas Series
s = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])
# or
s=pd.Series({'a':10,'b':20,'c':30,'d':40})
print(s)

# Create a DataFrame
df = pd.DataFrame( {'Name': ['John', 'Anna', 'Peter', 'Linda'],
        'Age': [28, 24, 35, 32],
        'City': ['New York', 'Paris', 'Berlin', 'London']})
print(df)
print(df['Name'])
# import numpy as np
# print(np.array(df))

a    10
b    20
c    30
d    40
dtype: int64
    Name  Age      City
0   John   28  New York
1   Anna   24     Paris
2  Peter   35    Berlin
3  Linda   32    London
0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object


## Data Indexing & Manipulation

In [None]:

# Select row by label
print(df.loc[0])  # Select the first row (index 0)
print()
# Select row by position
print(df.iloc[0])  # Same as above, selects the first row
# print(df.at/iat(1,'Name'))
print()

#adding a new column (no of row ekements needa be same)
df['Salary']=[3,4,4,5]
#drop always index wise rows del karta hai col del ke liye 
#   add axis=1(implicitly by default 0)
df.drop('Salary',axis=1)
#but this is temporary, for permanent, add inplace=True
print(df)
print()

# Filter rows where Age is greater than 30
print(df[df['Age'] > 30])
print()
# Sort the DataFrame by Age
print(df.sort_values(by='Age'))

## Missing Data
print()
# Detect missing values
print(df.isnull())
print()
print(df)

# Fill missing values with a default value
# df.fillna(0)

# Remove missing values
# df.dropna():

    

Name        John
Age           28
City    New York
Name: 0, dtype: object

Name        John
Age           28
City    New York
Name: 0, dtype: object

    Name  Age      City  Salary
0   John   28  New York       3
1   Anna   24     Paris       4
2  Peter   35    Berlin       4
3  Linda   32    London       5

    Name  Age    City  Salary
2  Peter   35  Berlin       4
3  Linda   32  London       5

    Name  Age      City  Salary
1   Anna   24     Paris       4
0   John   28  New York       3
3  Linda   32    London       5
2  Peter   35    Berlin       4

    Name    Age   City  Salary
0  False  False  False   False
1  False  False  False   False
2  False  False  False   False
3  False  False  False   False

    Name  Age      City  Salary
0   John   28  New York       3
1   Anna   24     Paris       4
2  Peter   35    Berlin       4
3  Linda   32    London       5


## Basic Statistics

In [3]:

# print(df.describe())

# Get the .mean/.sum of the Age column
print(df['Age'].mean())

# Get a summary of statistics for the DataFrame
print(df.describe())

29.75
             Age    Salary
count   4.000000  4.000000
mean   29.750000  4.000000
std     4.787136  0.816497
min    24.000000  3.000000
25%    27.000000  3.750000
50%    30.000000  4.000000
75%    32.750000  4.250000
max    35.000000  5.000000


## Read/Write to other files:

In [4]:
# Pandas makes it easy to read from and write to different file formats,
#       such as CSV, Excel, JSON, and more.

# Read data from a CSV file
# df = pd.read_csv('filename.csv')
#df.describe()
# df.head(5) #for first five 
# df.tail(5) #for last five

# Write DataFrame to a CSV file
# df.to_csv('output.csv', index=False)
# df.dtypes
# filling missing values with the mean of the column
#       df['Sales_fillNA']=df['Sales'].fillna(df['Sales'].mean())
