In [37]:
import pandas as pd

df = pd.read_csv("orders.csv")
df.head()

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
0,1001,John Smith,Laptop,Electronics,1,1200.0,2024-06-01,Yes,USA
1,1002,Sarah Lee,Headphones,Electronics,2,150.0,2024-06-03,No,Canada
2,1003,Ali Khan,Office Chair,Furniture,1,300.0,2024-06-04,Yes,UAE
3,1004,Alice Wong,Desk Lamp,Furniture,3,45.0,2024-06-05,Yes,Singapore
4,1005,Carlos Mendez,Keyboard,Electronics,2,80.0,2024-06-06,No,Mexico


In [38]:
df.columns #and also df.index

Index(['OrderID', 'CustomerName', 'Product', 'Category', 'Quantity', 'Price',
       'OrderDate', 'Shipped', 'Country'],
      dtype='object')

In [39]:
df.head()

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
0,1001,John Smith,Laptop,Electronics,1,1200.0,2024-06-01,Yes,USA
1,1002,Sarah Lee,Headphones,Electronics,2,150.0,2024-06-03,No,Canada
2,1003,Ali Khan,Office Chair,Furniture,1,300.0,2024-06-04,Yes,UAE
3,1004,Alice Wong,Desk Lamp,Furniture,3,45.0,2024-06-05,Yes,Singapore
4,1005,Carlos Mendez,Keyboard,Electronics,2,80.0,2024-06-06,No,Mexico


In [40]:
df.describe()#also df.info gives info about columns and their types

Unnamed: 0,OrderID,Quantity,Price
count,40.0,40.0,40.0
mean,1020.5,5.45,106.4575
std,11.690452,15.903475,201.091854
min,1001.0,1.0,0.8
25%,1010.75,1.0,18.0
50%,1020.5,1.0,43.5
75%,1030.25,2.25,112.5
max,1040.0,100.0,1200.0


### Indexing By row & column

In [41]:
#set(df['Country'])
len(df['Country'])

40

In [42]:
df[['Country','Product']]

Unnamed: 0,Country,Product
0,USA,Laptop
1,Canada,Headphones
2,UAE,Office Chair
3,Singapore,Desk Lamp
4,Mexico,Keyboard
5,UAE,Notebook
6,UK,Standing Desk
7,South Korea,Monitor
8,Italy,Pen
9,Japan,Mouse


In [43]:
#indexing by row
df.iloc[10]["Country"]

'France'

In [44]:
list(df.iloc[0])

[np.int64(1001),
 'John Smith',
 'Laptop',
 'Electronics',
 np.int64(1),
 np.float64(1200.0),
 '2024-06-01',
 'Yes',
 'USA']

## Filtering

In [45]:
df[(df["Category"] == "Electronics") & (df["Country"] == "USA")]

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
0,1001,John Smith,Laptop,Electronics,1,1200.0,2024-06-01,Yes,USA


In [46]:
df[df["Quantity"] > 20]# u can do all types(>,<,==,!=,<=,>=)

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
33,1034,Alex Johnson,Index Cards,Stationery,100,0.8,2024-07-05,Yes,USA


In [47]:
df[df['CustomerName'].str.startswith('Ann')]#also have endswith

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
16,1017,Anna Kowalski,Wireless Mouse,Electronics,2,45.0,2024-06-18,Yes,Poland
39,1040,Anna Ivanova,Desk Clock,Furniture,1,35.0,2024-07-11,Yes,Ukraine


In [48]:
df[~df["Country"].isin(["USA", "Sweden", "Brazil"])]

Unnamed: 0,OrderID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
1,1002,Sarah Lee,Headphones,Electronics,2,150.0,2024-06-03,No,Canada
2,1003,Ali Khan,Office Chair,Furniture,1,300.0,2024-06-04,Yes,UAE
3,1004,Alice Wong,Desk Lamp,Furniture,3,45.0,2024-06-05,Yes,Singapore
4,1005,Carlos Mendez,Keyboard,Electronics,2,80.0,2024-06-06,No,Mexico
5,1006,Nora Ibrahim,Notebook,Stationery,5,5.0,2024-06-07,Yes,UAE
6,1007,Emily Davis,Standing Desk,Furniture,1,450.0,2024-06-08,Yes,UK
7,1008,James Park,Monitor,Electronics,2,220.0,2024-06-09,No,South Korea
8,1009,Maria Rossi,Pen,Stationery,10,1.5,2024-06-10,Yes,Italy
9,1010,Takeshi Nakamura,Mouse,Electronics,1,35.0,2024-06-11,Yes,Japan
10,1011,Sophie Martin,Webcam,Electronics,1,85.0,2024-06-12,Yes,France


### UPDATING DATA

In [None]:
df.loc[df["Country"] == "USA", "Country"] = "United States"#changing an entire column called USA into United States

In [None]:
df.loc[df['CustomerName'] == 'Anna Ivanova','Product'] == 'Chair'#updating

In [None]:
df["Country"] = df["Country"].str.upper()#changing all values in Country column  into uppercase

### Deleting data

In [None]:
#remove some entries
#df.drop(39)
#remove permanently -- df = df.drop(39)

### CLEANING DATA

In [None]:
df.dropna(inplace = True)#if we give inplace = true(permanent) it modifies the existing df otherwise it will return a new df(temporary)
df.fillna({'OrderID': 0},inplace = True)

In [53]:
df.rename(columns={"OrderID": "Order ID"}, inplace=True)

### ANALYZING DATA

In [74]:
df['Country'].value_counts()

Country
UNITED STATES    3
SOUTH KOREA      3
JAPAN            3
CANADA           2
FRANCE           2
UAE              2
ITALY            2
UK               2
GERMANY          2
SINGAPORE        1
INDIA            1
MEXICO           1
AUSTRALIA        1
SPAIN            1
EGYPT            1
CHINA            1
POLAND           1
SAUDI ARABIA     1
BRAZIL           1
PAKISTAN         1
SWEDEN           1
ARGENTINA        1
RUSSIA           1
MOROCCO          1
COLOMBIA         1
PORTUGAL         1
IRAN             1
UKRAINE          1
Name: count, dtype: int64

In [77]:
df.groupby('Country')['Price'].sum()

Country
ARGENTINA          22.0
AUSTRALIA          12.0
BRAZIL             95.0
CANADA            153.5
CHINA             280.0
COLOMBIA           42.0
EGYPT               8.0
FRANCE            160.0
GERMANY           175.0
INDIA             180.0
IRAN               28.0
ITALY              19.5
JAPAN             105.0
MEXICO             80.0
MOROCCO            32.0
PAKISTAN            4.0
POLAND             45.0
PORTUGAL          125.0
RUSSIA              2.5
SAUDI ARABIA      120.0
SINGAPORE          45.0
SOUTH KOREA       348.0
SPAIN              25.0
SWEDEN             28.0
UAE               305.0
UK                518.0
UKRAINE            35.0
UNITED STATES    1265.8
Name: Price, dtype: float64

In [78]:
df.sort_values("Price", ascending=True)

Unnamed: 0,Order ID,CustomerName,Product,Category,Quantity,Price,OrderDate,Shipped,Country
33,1034,Alex Johnson,Index Cards,Stationery,100,0.8,2024-07-05,Yes,UNITED STATES
8,1009,Maria Rossi,Pen,Stationery,10,1.5,2024-06-10,Yes,ITALY
27,1028,Nina Petrov,Binder Clips,Stationery,12,2.5,2024-06-29,Yes,RUSSIA
20,1021,David Thompson,Highlighters,Stationery,8,3.5,2024-06-22,No,CANADA
23,1024,Zara Ahmed,Sticky Notes,Stationery,15,4.0,2024-06-25,No,PAKISTAN
5,1006,Nora Ibrahim,Notebook,Stationery,5,5.0,2024-06-07,Yes,UAE
15,1016,Ahmed Hassan,Printer Paper,Stationery,20,8.0,2024-06-17,Yes,EGYPT
12,1013,Emma Wilson,Stapler,Stationery,3,12.0,2024-06-14,Yes,AUSTRALIA
18,1019,Yuki Tanaka,Calculator,Electronics,1,15.0,2024-06-20,Yes,JAPAN
29,1030,Giuseppe Romano,Desk Calendar,Furniture,1,18.0,2024-07-01,No,ITALY


In [None]:
#if u do modification and u want to save it 
df.to_csv("new_file.csv", index=False)#not include index