In [41]:
import pandas as pd  #import pandas 

Creating DataFrames:
    
1. **pd.DataFrame(data, columns)**: Create a DataFrame from data like a list, NumPy array, or dictionary.
2. **pd.read_csv(), pd.read_excel()**: Read data from external files.
3. **pd.DataFrame.from_dict()**: Create a DataFrame from a dictionary.

In [54]:
#Creating a Sample DataFrame
data = {
    "Name" : ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    "Age" : [25, 30, 35, 40, 45],
    "Salary" : [50000, 60000, 70000, 80000, 90000]
}
df = pd.DataFrame(data)
print(df)

      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
3    David   40   80000
4      Eva   45   90000


In [35]:
df

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000
2,Charlie,35,70000
3,David,40,80000
4,Eva,45,90000


Data Exploration and Inspection:
    
1. df.head(n) and df.tail(n): Display the first/last n rows of the DataFrame.
2. df.info(): Provide information about the DataFrame, including data types and non-null counts.
3. df.describe(): Generate summary statistics for numeric columns.
4. df.shape: Get the dimensions (number of rows and columns) of the DataFrame.
5. df.columns and df.index: Access column and index information.
6. df.nunique(): Count the number of unique values in each column.

In [6]:
# Data Viewing and Inspection
df.head(2) #Display the first n rows of the DataFrame.

Unnamed: 0,Name,Age,Salary
0,Alice,25,50000
1,Bob,30,60000


In [37]:
df.tail(2)  #Display the last n rows of the DataFrame.

Unnamed: 0,Name,Age,Salary
3,David,40,80000
4,Eva,45,90000


In [40]:
df.info()  #Provide information about the DataFrame, including data types and non-null counts.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    5 non-null      object
 1   Age     5 non-null      int64 
 2   Salary  5 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 248.0+ bytes


In [9]:
df.shape  #Get the dimensions (number of rows and columns) of the DataFrame.

(5, 3)

In [14]:
print(df.describe()) # Display summary statistics for numeric columns

             Age        Salary
count   5.000000      5.000000
mean   35.000000  70000.000000
std     7.905694  15811.388301
min    25.000000  50000.000000
25%    30.000000  60000.000000
50%    35.000000  70000.000000
75%    40.000000  80000.000000
max    45.000000  90000.000000


In [38]:
df.columns  #Access column information.

Index(['Name', 'Age', 'Salary'], dtype='object')

In [39]:
df.index #Access index information.

RangeIndex(start=0, stop=5, step=1)

In [48]:
df.nunique()  # Count the number of unique values in each column.

Name      5
Age       5
Salary    5
dtype: int64

***Data Selection and Indexing:***
    
1. df['column_name']:   Select a single column.
2. df[['col1', 'col2']]:   Select multiple columns.
3. df.loc[]:     Select rows and columns by labels.
4. df.iloc[]:      Select rows and columns by integer positions.
5. df.at[] and df.iat[]:     Access specific elements by label or position.

In [50]:
df['Name'] # Select a single column

0      Alice
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object

In [20]:
print(df[['Name','Age']]) # Select 'Name' and 'Age' columns

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40
4      Eva   45


In [22]:
# Select the row with index 2

df.loc[2]

Name      Charlie
Age            35
Salary      70000
Name: 2, dtype: object

In [51]:
#Select a single row and single column by integer position
df.iloc[2,1] #Selects the element in the 3rd row (index 2) and 2nd column (index 1)

35

In [24]:
# Select rows 1 to 3 and columns 'Name' and 'Salary'
df.loc[1:3,['Name', 'Salary']]

Unnamed: 0,Name,Salary
1,Bob,60000
2,Charlie,70000
3,David,80000


In [60]:
#Select all rows for specific columns by integer positions

df.iloc[:,[0,2]] # Select all rows for columns 1 and 3 (index 0 and 2)

Unnamed: 0,Name,Salary
0,Alice,50000
1,Bob,60000
2,Charlie,70000
3,David,80000
4,Eva,90000


In [65]:
# Creating a sample DataFrame with labels

data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data, index=['X', 'Y', 'Z'])
df

Unnamed: 0,A,B,C
X,1,4,7
Y,2,5,8
Z,3,6,9


In [67]:
# Example 1: Using df.at[] for label-based indexin
value = df.at['Y', 'B']
value

5

Data Filtering and Subsetting:
    
1. df[df['column_name'] > value]: Filter rows based on a condition.
2. df.query('condition'): Filter rows using a query string.

In [27]:
# Data Filtering
# Filter rows where Age is greater than 30

fd = df[df['Age']>30]
fd

Unnamed: 0,Name,Age,Salary
2,Charlie,35,70000
3,David,40,80000
4,Eva,45,90000


In [None]:
df.query('

In [31]:
# Data Sorting
# Sort by 'Age' in descending order

sort_Data = df.groupby('Age')
print(sort_Data)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fdb036a9360>


In [61]:
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data, index=['X', 'Y', 'Z'])
df

Unnamed: 0,A,B,C
X,1,4,7
Y,2,5,8
Z,3,6,9


In [62]:
value_iat = df.iat[1, 2]  # Access the element at row index 1 and column index 2
print("Value using df.iat[]:", value_iat) 

Value using df.iat[]: 8
