# creating simple Dataframe

In [2]:
import pandas as pd
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}

# Accessing Dataframe

In [3]:
df = pd.DataFrame(data)
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


# Read CSV file

In [26]:
import pandas as pd
df = pd.read_csv('sales_data.csv')

# Viewing the Data

In [27]:
print(df)

   OrderID     Product     Category  Quantity    Price   OrderDate  \
0        1  Smartphone  Electronics         1   699.99  2023-05-14   
1        2      Laptop  Electronics         2  1299.99  2023-05-18   
2        3  Headphones  Electronics         5   199.99  2023-05-21   
3        4     Monitor  Electronics         3   499.99  2023-05-23   
4        5        Book      Fashion        10    39.99  2023-06-02   
5        6       Shoes      Fashion         4    89.99  2023-06-05   
6        7       Shirt      Fashion         6    29.99  2023-06-07   
7        8      Tablet  Electronics         3   399.99  2023-06-10   

            City    Country  
0       New York        USA  
1  San Francisco        USA  
2         London         UK  
3          Paris     France  
4         Berlin    Germany  
5         Sydney  Australia  
6       New York        USA  
7  San Francisco        USA  


In [28]:
#The head() method returns the headers and a specified number of rows, starting from the top.

print(df.head(5))

   OrderID     Product     Category  Quantity    Price   OrderDate  \
0        1  Smartphone  Electronics         1   699.99  2023-05-14   
1        2      Laptop  Electronics         2  1299.99  2023-05-18   
2        3  Headphones  Electronics         5   199.99  2023-05-21   
3        4     Monitor  Electronics         3   499.99  2023-05-23   
4        5        Book      Fashion        10    39.99  2023-06-02   

            City  Country  
0       New York      USA  
1  San Francisco      USA  
2         London       UK  
3          Paris   France  
4         Berlin  Germany  


In [29]:
#The tail() method returns the headers and a specified number of rows, starting from the bottom.

print(df.tail(5))

   OrderID  Product     Category  Quantity   Price   OrderDate           City  \
3        4  Monitor  Electronics         3  499.99  2023-05-23          Paris   
4        5     Book      Fashion        10   39.99  2023-06-02         Berlin   
5        6    Shoes      Fashion         4   89.99  2023-06-05         Sydney   
6        7    Shirt      Fashion         6   29.99  2023-06-07       New York   
7        8   Tablet  Electronics         3  399.99  2023-06-10  San Francisco   

     Country  
3     France  
4    Germany  
5  Australia  
6        USA  
7        USA  


# Creating Numpy Array

In [30]:
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(arr)


[1 2 3 4 5]


# Access Array Elements

In [14]:
print(arr[3])

4


In [16]:
print(arr[2] + arr[3])

7


# NumPy Array Slicing

In [17]:
print(arr[1:5])

[2 3 4 5]


In [18]:
print(arr[4:])

[5]


In [None]:
print(arr[:4])

# working on CSV file

### Basic Data Exploration

In [31]:
# Check the shape of the dataset
df.shape

(8, 8)

In [32]:
# Get a summary of the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   OrderID    8 non-null      int64  
 1   Product    8 non-null      object 
 2   Category   8 non-null      object 
 3   Quantity   8 non-null      int64  
 4   Price      8 non-null      float64
 5   OrderDate  8 non-null      object 
 6   City       8 non-null      object 
 7   Country    8 non-null      object 
dtypes: float64(1), int64(2), object(5)
memory usage: 640.0+ bytes


In [33]:
# Check for any missing values
df.isnull().sum()

OrderID      0
Product      0
Category     0
Quantity     0
Price        0
OrderDate    0
City         0
Country      0
dtype: int64

# Filtering Data

In [35]:
# Filter orders where the category is 'Electronics'
electronics_df = df[df['Category'] == 'Electronics']
electronics_df.head()

Unnamed: 0,OrderID,Product,Category,Quantity,Price,OrderDate,City,Country
0,1,Smartphone,Electronics,1,699.99,2023-05-14,New York,USA
1,2,Laptop,Electronics,2,1299.99,2023-05-18,San Francisco,USA
2,3,Headphones,Electronics,5,199.99,2023-05-21,London,UK
3,4,Monitor,Electronics,3,499.99,2023-05-23,Paris,France
7,8,Tablet,Electronics,3,399.99,2023-06-10,San Francisco,USA


# Applying NumPy Functions

In [37]:
import numpy as np

# Calculate total sales for each order (Quantity * Price)
df['TotalSales'] = df['Quantity'] * df['Price']

# Calculate the overall total sales using NumPy's sum function
total_sales = np.sum(df['TotalSales'])
print(f"Total sales amount: Rs{total_sales:.2f}")


Total sales amount: Rs7939.66


# Grouping Data

In [38]:
# Group data by category and calculate total quantity sold in each category
category_sales = df.groupby('Category')['Quantity'].sum()
print(category_sales)


Category
Electronics    14
Fashion        20
Name: Quantity, dtype: int64


# Analyzing Sales by Country

In [39]:
# Group data by country and calculate total sales
country_sales = df.groupby('Country')['TotalSales'].sum()

# Display the sales by country
print(country_sales)


Country
Australia     359.96
France       1499.97
Germany       399.90
UK            999.95
USA          4679.88
Name: TotalSales, dtype: float64


# Data Cleaning

In [45]:
# Check for duplicates
duplicate=df.duplicated().sum()

# Drop any rows with missing values (if there were any)
df_clean = df.dropna()
