### Day 5 - Introduction to Pandas (Series & DataFrames)

In [33]:
import pandas as pd

##### 1. Creating a Pandas Series

In [34]:
data = [10, 20, 30, 40, 50]
series = pd.Series(data)

print("Pandas Series:")
print(series)

Pandas Series:
0    10
1    20
2    30
3    40
4    50
dtype: int64


In [35]:
# Custom index
series2 = pd.Series([100, 200, 300], index=['a', 'b', 'c'])
print("\nSeries with custom index:")
print(series2)


Series with custom index:
a    100
b    200
c    300
dtype: int64


##### 2. Creating a DataFrame

In [36]:
data = {
    'Name': ['Amit', 'Riya', 'John', 'Sara'],
    'Age': [25, 22, 29, 24],
    'City': ['Delhi', 'Mumbai', 'New York', 'London']
}

df = pd.DataFrame(data)
print("\nDataFrame:")
print(df)


DataFrame:
   Name  Age      City
0  Amit   25     Delhi
1  Riya   22    Mumbai
2  John   29  New York
3  Sara   24    London


##### 3. Reading Data from CSV

In [37]:
df_csv = pd.read_csv(r'C:\Users\hp\OneDrive\Documents\ml datasets\50_Startups.csv')
print("\nCSV Data:")
print(df_csv.head())


CSV Data:
   R&D Spend  Administration  Marketing Spend       State     Profit
0  165349.20       136897.80        471784.10    New York  192261.83
1  162597.70       151377.59        443898.53  California  191792.06
2  153441.51       101145.55        407934.54     Florida  191050.39
3  144372.41       118671.85        383199.62    New York  182901.99
4  142107.34        91391.77        366168.42     Florida  166187.94


##### 4. Basic DataFrame Operations

In [38]:
print("\nFirst 2 rows:")
print(df.head(2))


First 2 rows:
   Name  Age    City
0  Amit   25   Delhi
1  Riya   22  Mumbai


In [39]:
print("\nLast 2 rows:")
print(df.tail(2))


Last 2 rows:
   Name  Age      City
2  John   29  New York
3  Sara   24    London


In [40]:
print("\nSummary info:")
print(df.info())


Summary info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes
None


In [41]:
print("\nStatistics:")
print(df.describe())


Statistics:
            Age
count   4.00000
mean   25.00000
std     2.94392
min    22.00000
25%    23.50000
50%    24.50000
75%    26.00000
max    29.00000


##### 5. Selecting Data

In [42]:
print("\nSelect 'Name' column:")
print(df['Name'])


Select 'Name' column:
0    Amit
1    Riya
2    John
3    Sara
Name: Name, dtype: object


In [43]:
print("\nSelect multiple columns:")
print(df[['Name', 'City']])


Select multiple columns:
   Name      City
0  Amit     Delhi
1  Riya    Mumbai
2  John  New York
3  Sara    London


In [44]:
print("\nSelect row by index (loc):")
print(df.loc[1])


Select row by index (loc):
Name      Riya
Age         22
City    Mumbai
Name: 1, dtype: object


In [45]:
print("\nSelect row by position (iloc):")
print(df.iloc[2])


Select row by position (iloc):
Name        John
Age           29
City    New York
Name: 2, dtype: object


##### 6. Adding a new column


In [46]:
df['Score'] = [88, 92, 75, 95]
print("\nDataFrame after adding 'Score':")
print(df)


DataFrame after adding 'Score':
   Name  Age      City  Score
0  Amit   25     Delhi     88
1  Riya   22    Mumbai     92
2  John   29  New York     75
3  Sara   24    London     95


##### 7. Filtering Data

In [47]:
print("\nPeople with Age > 24:")
print(df[df['Age'] > 24])


People with Age > 24:
   Name  Age      City  Score
0  Amit   25     Delhi     88
2  John   29  New York     75
