In [1]:
import numpy as np

# 📌 Basic Array Attributes
Let's create a NumPy array and explore its properties.

In [2]:
arr = np.array([[1, 2, 3], [4, 5, 6]])  # A 2D NumPy array
print(type(arr))  # Output: <class 'numpy.ndarray'>
print(arr.ndim)  # Output: 2
print(arr.shape)  # Output: (2, 3)
print(arr.size)  # Output: 6
print(arr.dtype)  # Output: int64 (or int32 based on system)


<class 'numpy.ndarray'>
2
(2, 3)
6
int64


# 📌 Array Operations
Let's create two NumPy arrays:

In [3]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[5, 6, 7], [8, 9, 0]])


# Addition of scalar 
print(arr1 + 3)

# element-wise addition of two arrays 
print(arr1 + arr2)

# subtraction,multiplication, division
print(arr1 - arr2)
print(arr1 * arr2)
print(arr1 / arr2) 




[[4 5 6]
 [7 8 9]]
[[ 6  8 10]
 [12 14  6]]
[[-4 -4 -4]
 [-4 -4  6]]
[[ 5 12 21]
 [32 45  0]]
[[0.2        0.33333333 0.42857143]
 [0.5        0.55555556        inf]]


  print(arr1 / arr2)


# 📌 Statistical Methods
NumPy provides powerful statistical functions:

In [4]:
print(np.mean(arr, axis=0))  # Mean of each column
print(np.median(arr, axis=1))  # Median of each row
print(np.std(arr))
print(np.min(arr))  # Smallest value
print(np.max(arr))  # Largest value
print(np.var(arr)) # variance 
print(np.std(arr)) # standard deviation 
print(np.percentile(arr, 25)) # 25th percentile

[2.5 3.5 4.5]
[2. 5.]
1.707825127659933
1
6
2.9166666666666665
1.707825127659933
2.25


Array Properties

<table border="1">
  <tr>
    <th>Feature</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>arr.ndim</code></td>
    <td>Number of dimensions</td>
  </tr>
  <tr>
    <td><code>arr.shape</code></td>
    <td>Shape of the array (rows, columns)</td>
  </tr>
  <tr>
    <td><code>arr.size</code></td>
    <td>Total number of elements</td>
  </tr>
  <tr>
    <td><code>arr.dtype</code></td>
    <td>Data type of elements</td>
  </tr>
</table>

Basic Operations

<table border="1">
  <tr>
    <th>Operation</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>arr + 3</code></td>
    <td>Adds 3 to each element</td>
  </tr>
  <tr>
    <td><code>arr1 + arr2</code></td>
    <td>Element-wise addition</td>
  </tr>
  <tr>
    <td><code>arr1 - arr2</code></td>
    <td>Element-wise subtraction</td>
  </tr>
  <tr>
    <td><code>arr1 * arr2</code></td>
    <td>Element-wise multiplication</td>
  </tr>
  <tr>
    <td><code>arr1 / arr2</code></td>
    <td>Element-wise division</td>
  </tr>
  <tr>
    <td><code>arr1 ** 2</code></td>
    <td>Element-wise exponentiation</td>
  </tr>
</table>

Statistical Functions

<table border="1">
  <tr>
    <th>Function</th>
    <th>Description</th>
  </tr>
  <tr>
    <td><code>np.mean(arr, axis=0)</code></td>
    <td>Mean of each column</td>
  </tr>
  <tr>
    <td><code>np.median(arr, axis=1)</code></td>
    <td>Median of each row</td>
  </tr>
  <tr>
    <td><code>np.std(arr)</code></td>
    <td>Standard deviation</td>
  </tr>
  <tr>
    <td><code>np.min(arr), np.max(arr)</code></td>
    <td>Minimum and maximum values</td>
  </tr>
  <tr>
    <td><code>np.var(arr)</code></td>
    <td>Variance</td>
  </tr>
  <tr>
    <td><code>np.percentile(arr, 25)</code></td>
    <td>25th percentile</td>
  </tr>
</table>



# Pandas 

 <table>
        <tr>
            <th>Function</th>
            <th>Description</th>
        </tr>
        <tr>
            <td><code>pd.DataFrame()</code></td>
            <td>Creates a DataFrame from a dictionary.</td>
        </tr>
        <tr>
            <td><code>df.loc[]</code></td>
            <td>Accesses rows using labels (index).</td>
        </tr>
        <tr>
            <td><code>df.iloc[]</code></td>
            <td>Accesses rows using integer position.</td>
        </tr>
        <tr>
            <td><code>df.info()</code></td>
            <td>Provides a summary of the DataFrame including data types and non-null values.</td>
        </tr>
        <tr>
            <td><code>df.describe()</code></td>
            <td>Generates summary statistics for numerical columns.</td>
        </tr>
        <tr>
            <td><code>df.dtypes</code></td>
            <td>Displays the data types of each column.</td>
        </tr>
        <tr>
            <td><code>pd.concat()</code></td>
            <td>Concatenates multiple DataFrames.</td>
        </tr>
        <tr>
            <td><code>df.isna().any()</code></td>
            <td>Checks if there are any missing values in the DataFrame.</td>
        </tr>
        <tr>
            <td><code>df.notna().all()</code></td>
            <td>Checks if all values are non-null.</td>
        </tr>
        <tr>
            <td><code>df.isna()</code> / <code>df.isnull()</code></td>
            <td>Returns a DataFrame indicating where values are missing.</td>
        </tr>
        <tr>
            <td><code>df.to_csv()</code></td>
            <td>Saves the DataFrame to a CSV file.</td>
        </tr>
        <tr>
            <td><code>pd.read_csv()</code></td>
            <td>Reads a CSV file into a DataFrame.</td>
        </tr>
        <tr>
            <td><code>df.drop()</code></td>
            <td>Drops specified columns or rows from the DataFrame.</td>
        </tr>
        <tr>
            <td><code>df.sum()</code></td>
            <td>Computes the sum of DataFrame columns or rows.</td>
        </tr>
        <tr>
            <td><code>pd.Series()</code></td>
            <td>Creates a Pandas Series (one-dimensional array).</td>
        </tr>
        <tr>
            <td>DataFrame from Series</td>
            <td>Creates a DataFrame by combining multiple Series.</td>
        </tr>
    </table>

In [13]:
import pandas as pd  # Importing pandas library

In [15]:
# Creating a DataFrame using a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank'],
    'Age': [25, 30, 35, 40, 45, 50],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia'],
    'Salary': [50000, 60000, 70000, 80000, 90000, 100000]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000
4,Eve,45,Phoenix,90000
5,Frank,50,Philadelphia,100000


In [16]:
# Displaying the DataFrame
print("DataFrame:")
print(df)

DataFrame:
      Name  Age          City  Salary
0    Alice   25      New York   50000
1      Bob   30   Los Angeles   60000
2  Charlie   35       Chicago   70000
3    David   40       Houston   80000
4      Eve   45       Phoenix   90000
5    Frank   50  Philadelphia  100000


In [17]:
# Displaying the DataFrame
print("DataFrame:")
print(df)

# Using .loc to access rows by labels (index)
print("\nUsing loc:")
print(df.loc[2])  # Fetching the 3rd row

# Using .iloc to access rows by integer position
print("\nUsing iloc:")
print(df.iloc[2])  # Fetching the 3rd row

# Getting info about the DataFrame
print("\nDataFrame Info:")
df.info()

# Getting descriptive statistics
print("\nDescriptive Statistics:")
print(df.describe())

# Checking data types of each column
print("\nData Types:")
print(df.dtypes)


DataFrame:
      Name  Age          City  Salary
0    Alice   25      New York   50000
1      Bob   30   Los Angeles   60000
2  Charlie   35       Chicago   70000
3    David   40       Houston   80000
4      Eve   45       Phoenix   90000
5    Frank   50  Philadelphia  100000

Using loc:
Name      Charlie
Age            35
City      Chicago
Salary      70000
Name: 2, dtype: object

Using iloc:
Name      Charlie
Age            35
City      Chicago
Salary      70000
Name: 2, dtype: object

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    6 non-null      object
 1   Age     6 non-null      int64 
 2   City    6 non-null      object
 3   Salary  6 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 324.0+ bytes

Descriptive Statistics:
             Age         Salary
count   6.000000       6.000000
mean   37.500000   75000.000000

In [18]:
# Creating another DataFrame
data2 = {
    'Name': ['Grace', 'Hank', 'Ian', 'Jack'],
    'Age': [28, 33, 38, 43],
    'City': ['San Diego', 'Austin', 'Denver', 'Seattle'],
    'Salary': [55000, 65000, 75000, 85000]
}
df2 = pd.DataFrame(data2)
df2

Unnamed: 0,Name,Age,City,Salary
0,Grace,28,San Diego,55000
1,Hank,33,Austin,65000
2,Ian,38,Denver,75000
3,Jack,43,Seattle,85000


In [19]:
# Concatenating two DataFrames
concatenated_df = pd.concat([df, df2], ignore_index=True)
print("\nConcatenated DataFrame:")
print(concatenated_df)


Concatenated DataFrame:
      Name  Age          City  Salary
0    Alice   25      New York   50000
1      Bob   30   Los Angeles   60000
2  Charlie   35       Chicago   70000
3    David   40       Houston   80000
4      Eve   45       Phoenix   90000
5    Frank   50  Philadelphia  100000
6    Grace   28     San Diego   55000
7     Hank   33        Austin   65000
8      Ian   38        Denver   75000
9     Jack   43       Seattle   85000


In [20]:
# Checking for any missing values using .any()
print("\nAny missing values:")
print(concatenated_df.isna().any())


Any missing values:
Name      False
Age       False
City      False
Salary    False
dtype: bool


In [21]:
# Checking if all values are non-null using .all()
print("\nAll non-null values:")
print(concatenated_df.notna().all())



All non-null values:
Name      True
Age       True
City      True
Salary    True
dtype: bool


In [22]:

# Checking for missing values using .isna() and .isnull()
print("\nChecking for missing values with isna():")
print(concatenated_df.isna())
print("\nChecking for missing values with isnull():")
print(concatenated_df.isnull())


Checking for missing values with isna():
    Name    Age   City  Salary
0  False  False  False   False
1  False  False  False   False
2  False  False  False   False
3  False  False  False   False
4  False  False  False   False
5  False  False  False   False
6  False  False  False   False
7  False  False  False   False
8  False  False  False   False
9  False  False  False   False

Checking for missing values with isnull():
    Name    Age   City  Salary
0  False  False  False   False
1  False  False  False   False
2  False  False  False   False
3  False  False  False   False
4  False  False  False   False
5  False  False  False   False
6  False  False  False   False
7  False  False  False   False
8  False  False  False   False
9  False  False  False   False


In [23]:
# Saving DataFrame to CSV
concatenated_df.to_csv("output.csv", index=False)
print("\nDataFrame saved to output.csv")


DataFrame saved to output.csv


In [24]:
# Reading DataFrame from CSV
read_df = pd.read_csv("output.csv")
print("\nDataFrame read from output.csv:")
print(read_df)


DataFrame read from output.csv:
      Name  Age          City  Salary
0    Alice   25      New York   50000
1      Bob   30   Los Angeles   60000
2  Charlie   35       Chicago   70000
3    David   40       Houston   80000
4      Eve   45       Phoenix   90000
5    Frank   50  Philadelphia  100000
6    Grace   28     San Diego   55000
7     Hank   33        Austin   65000
8      Ian   38        Denver   75000
9     Jack   43       Seattle   85000


In [25]:
# Dropping a column
read_df = read_df.drop(columns=['City'])
print("\nDataFrame after dropping 'City' column:")
print(read_df)


DataFrame after dropping 'City' column:
      Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
3    David   40   80000
4      Eve   45   90000
5    Frank   50  100000
6    Grace   28   55000
7     Hank   33   65000
8      Ian   38   75000
9     Jack   43   85000


In [26]:
# Summing up the values of the Age column
print("\nSum of Age column:")
print(read_df['Age'].sum())


Sum of Age column:
367


In [27]:
# Creating Series
data_series1 = pd.Series([1, 2, 3, 4, 5])
data_series2 = pd.Series([10, 20, 30, 40, 50])

In [28]:
# Creating DataFrame by concatenating two Series
df_from_series = pd.DataFrame({'Column1': data_series1, 'Column2': data_series2})
print("\nDataFrame created from two Series:")
print(df_from_series)


DataFrame created from two Series:
   Column1  Column2
0        1       10
1        2       20
2        3       30
3        4       40
4        5       50
