# NumPy Operations


In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create a NumPy array
arr = np.array([1, 2, 3, 4, 5])

print("Original Array:", arr)

Original Array: [1 2 3 4 5]


# Addition, Subtraction, Multiplication, and Division

In [3]:
# Addition
result = arr + 2
print("Addition:", result)

# Subtraction
result = arr - 2
print("Subtraction:", result)

# Multiplication
result = arr * 2
print("Multiplication:", result)

# Division
result = arr / 2
print("Division:", result)

Addition: [3 4 5 6 7]
Subtraction: [-1  0  1  2  3]
Multiplication: [ 2  4  6  8 10]
Division: [0.5 1.  1.5 2.  2.5]


# Indexing and Slicing

In [4]:
# Access specific elements
print("First element:", arr[0])
print("Last element:", arr[-1])

# Slice a subset of the array
print("Slice [1:3]:", arr[1:3])

First element: 1
Last element: 5
Slice [1:3]: [2 3]


# Reshaping and Transposing

In [5]:
# Reshape the array
arr_reshaped = arr.reshape(5, 1)
print("Reshaped Array:", arr_reshaped)

# Transpose the array
arr_transposed = arr_reshaped.T
print("Transposed Array:", arr_transposed)

Reshaped Array: [[1]
 [2]
 [3]
 [4]
 [5]]
Transposed Array: [[1 2 3 4 5]]


# Statistics

In [6]:
# Calculate mean, median, standard deviation, and variance
print("Mean:", np.mean(arr))
print("Median:", np.median(arr))
print("Standard Deviation:", np.std(arr))
print("Variance:", np.var(arr))

Mean: 3.0
Median: 3.0
Standard Deviation: 1.4142135623730951
Variance: 2.0


# Arrays with Different Data Types

In [7]:
# Create arrays with different data types
arr_int = np.array([1, 2, 3], dtype=int)
arr_float = np.array([1.0, 2.0, 3.0], dtype=float)
arr_bool = np.array([True, False, True], dtype=bool)
arr_str = np.array(["hello", "world", "numpy"], dtype=str)

print("Int Array:", arr_int)
print("Float Array:", arr_float)
print("Bool Array:", arr_bool)
print("Str Array:", arr_str)

Int Array: [1 2 3]
Float Array: [1. 2. 3.]
Bool Array: [ True False  True]
Str Array: ['hello' 'world' 'numpy']


# Concatenation

In [8]:
# Concatenate arrays
arr_concat = np.concatenate((arr_int, arr_float))
print("Concatenated Array:", arr_concat)

Concatenated Array: [1. 2. 3. 1. 2. 3.]


# Sorting

In [9]:
# Sort the array in ascending order
arr_sorted = np.sort(arr)
print("Sorted Array (Ascending):", arr_sorted)

# Sort the array in descending order
arr_sorted = np.sort(arr)[::-1]
print("Sorted Array (Descending):", arr_sorted)

Sorted Array (Ascending): [1 2 3 4 5]
Sorted Array (Descending): [5 4 3 2 1]


# Pandas

In [10]:
df = pd.read_csv('retail supermarket.csv')

In [12]:
df.head(5)

Unnamed: 0,Ship Mode,Segment,Country,City,State,Postal Code,Region,Category,Sub-Category,Sales,Quantity,Discount,Profit
0,Second Class,Consumer,United States,Henderson,Kentucky,42420,South,Furniture,Bookcases,261.96,2,0.0,41.9136
1,Second Class,Consumer,United States,Henderson,Kentucky,42420,South,Furniture,Chairs,731.94,3,0.0,219.582
2,Second Class,Corporate,United States,Los Angeles,California,90036,West,Office Supplies,Labels,14.62,2,0.0,6.8714
3,Standard Class,Consumer,United States,Fort Lauderdale,Florida,33311,South,Furniture,Tables,957.5775,5,0.45,-383.031
4,Standard Class,Consumer,United States,Fort Lauderdale,Florida,33311,South,Office Supplies,Storage,22.368,2,0.2,2.5164


In [14]:
#  Exploring the structure of the DataFrame
print("Shape of the DataFrame:", df.shape)

Shape of the DataFrame: (9994, 13)


In [15]:
print("\nData Types:\n", df.dtypes)


Data Types:
 Ship Mode        object
Segment          object
Country          object
City             object
State            object
Postal Code       int64
Region           object
Category         object
Sub-Category     object
Sales           float64
Quantity          int64
Discount        float64
Profit          float64
dtype: object


In [16]:
print("\nMissing Values:\n", df.isnull().sum())


Missing Values:
 Ship Mode       0
Segment         0
Country         0
City            0
State           0
Postal Code     0
Region          0
Category        0
Sub-Category    0
Sales           0
Quantity        0
Discount        0
Profit          0
dtype: int64


In [17]:
#  Descriptive statistics for numerical columns
print("\nDescriptive Statistics (Numerical Columns):\n", df.describe())


Descriptive Statistics (Numerical Columns):
         Postal Code         Sales     Quantity     Discount       Profit
count   9994.000000   9994.000000  9994.000000  9994.000000  9994.000000
mean   55190.379428    229.858001     3.789574     0.156203    28.656896
std    32063.693350    623.245101     2.225110     0.206452   234.260108
min     1040.000000      0.444000     1.000000     0.000000 -6599.978000
25%    23223.000000     17.280000     2.000000     0.000000     1.728750
50%    56430.500000     54.490000     3.000000     0.200000     8.666500
75%    90008.000000    209.940000     5.000000     0.200000    29.364000
max    99301.000000  22638.480000    14.000000     0.800000  8399.976000


In [18]:
# Descriptive statistics for all columns including categorical data
print("\nDescriptive Statistics (All Columns):\n", df.describe(include='all'))


Descriptive Statistics (All Columns):
              Ship Mode   Segment        Country           City       State  \
count             9994      9994           9994           9994        9994   
unique               4         3              1            531          49   
top     Standard Class  Consumer  United States  New York City  California   
freq              5968      5191           9994            915        2001   
mean               NaN       NaN            NaN            NaN         NaN   
std                NaN       NaN            NaN            NaN         NaN   
min                NaN       NaN            NaN            NaN         NaN   
25%                NaN       NaN            NaN            NaN         NaN   
50%                NaN       NaN            NaN            NaN         NaN   
75%                NaN       NaN            NaN            NaN         NaN   
max                NaN       NaN            NaN            NaN         NaN   

         Postal Code Re

In [19]:
#  Selecting specific columns
# For example, selecting 'City', 'State', 'Sales', and 'Profit' columns
selected_columns = df[['City', 'State', 'Sales', 'Profit']]
print("\nSelected Columns:\n", selected_columns.head())


Selected Columns:
               City       State     Sales    Profit
0        Henderson    Kentucky  261.9600   41.9136
1        Henderson    Kentucky  731.9400  219.5820
2      Los Angeles  California   14.6200    6.8714
3  Fort Lauderdale     Florida  957.5775 -383.0310
4  Fort Lauderdale     Florida   22.3680    2.5164


In [21]:
# Filtering rows where Sales are greater than 1000
filtered_rows = df[df['Sales'] > 1000]
print("\nRows where Sales > 1000:\n", filtered_rows.head())


Rows where Sales > 1000:
          Ship Mode    Segment        Country           City         State  \
10  Standard Class   Consumer  United States    Los Angeles    California   
24  Standard Class   Consumer  United States           Orem          Utah   
27  Standard Class   Consumer  United States   Philadelphia  Pennsylvania   
35     First Class  Corporate  United States     Richardson         Texas   
54  Standard Class  Corporate  United States  New York City      New York   

    Postal Code   Region    Category Sub-Category     Sales  Quantity  \
10        90032     West   Furniture       Tables  1706.184         9   
24        84057     West   Furniture       Tables  1044.630         3   
27        19140     East   Furniture    Bookcases  3083.430         7   
35        75080  Central  Technology       Phones  1097.544         7   
54        10024     East  Technology       Phones  1029.950         5   

    Discount     Profit  
10       0.2    85.3092  
24       0.0   240.

In [22]:
# Checking for unique values in the 'Category' column
unique_categories = df['Category'].unique()
print("\nUnique Categories:\n", unique_categories)



Unique Categories:
 ['Furniture' 'Office Supplies' 'Technology']
