# NumPy(Numerical Python) is a fundamental library for numerical computing in Python. It provides support for arrays, matrices, and many mathematical functions to operate on these data structures. Here's a basic guide to get started with NumPy:

**creating an array and printing**

In [4]:
import numpy as np
arr = np.array([1,2,3,4])
print(arr)

[1 2 3 4]


**create zeroes array of 2*3**

In [6]:
zeros = np.zeros((2,3))
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]]


**creating ones array of 3*3 matrix**

In [8]:
ones = np.ones((3,3))
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


**creating an indentity matrix**

In [10]:
eye= np.eye(2)
print(eye)

[[1. 0.]
 [0. 1.]]


**creating a random matrix**

In [12]:
rand=np.random.rand(3,3)
print(rand)

[[0.26237733 0.0687091  0.38627562]
 [0.85945532 0.20787087 0.40621004]
 [0.2309756  0.26116406 0.38490848]]


 ***Numpy Analysis of Injury Data***
-  First import the required
  -  using numpy only

In [6]:
import numpy as np
import csv

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 32 data analysis\archive\injury_data.csv'

with open(file_path, 'r') as a:
    reader = csv.reader(a)
    data = list(reader)
    #print(data)
    print(data[:5])

[['Player_Age', 'Player_Weight', 'Player_Height', 'Previous_Injuries', 'Training_Intensity', 'Recovery_Time', 'Likelihood_of_Injury'], ['24', '66.25193286255299', '175.73242883117646', '1', '0.4579289944340279', '5', '0'], ['37', '70.99627126832448', '174.58165012331358', '0', '0.2265216260361057', '6', '1'], ['32', '80.09378116336106', '186.32961751509828', '0', '0.6139703063252326', '2', '1'], ['28', '87.47327055231725', '175.50423961774717', '1', '0.2528581182501112', '4', '1']]


#**to get max, min and average (mean) values for each column**

In [8]:
# Load the CSV file using np.genfromtxt (or np.loadtxt)
data = np.genfromtxt(file_path, delimiter=",", skip_header=1)  # Skip the first row (headers)

# Extract column names from the first row of the CSV
column_names = np.genfromtxt(file_path, delimiter=",", max_rows=1, dtype=str)

# Check if the data is a NumPy array
print(type(data))  # Should print <class 'numpy.ndarray'>

# Extract the 5th column (index 4), assuming it's numeric data
player_age = data[:, 0].astype(float)


# Find the minimum, maximum, and mean values of the "Player_Age" column
min_value = np.min(player_age)
print(f"Minimum Value of Player_Age is {min_value}")
max_value = np.max(player_age)
print(f"Maximum Value of Player_Age is {max_value}")
mean_value = np.mean(player_age) #average
print(f"Mean Value of Player_Age is {mean_value}")




# Iterate over each column and print the column name and its mean value
num_cols = data.shape[1]

for i in range(num_cols):
    col = data[:, i].astype(float)  # Get column i and convert to float
    max_val = np.max(col)  # Calculate the mean of the column
    print(f"Column: {column_names[i]}, Max: {max_val}")


<class 'numpy.ndarray'>
Minimum Value of Player_Age is 18.0
Maximum Value of Player_Age is 39.0
Mean Value of Player_Age is 28.231
Column: Player_Age, Max: 39.0
Column: Player_Weight, Max: 104.65010351720721
Column: Player_Height, Max: 207.3086723322357
Column: Previous_Injuries, Max: 1.0
Column: Training_Intensity, Max: 0.9977493890102006
Column: Recovery_Time, Max: 6.0
Column: Likelihood_of_Injury, Max: 1.0


**to retrieve data of top 10 most aged people**

In [36]:
# Extract the "Player_Age" column (assuming it's the first column)
player_age = data[:, 0]  # Replace 0 with the correct index if the column is not the first one

# Sort the data based on the "Player_Age" column in descending order
sorted_indices = np.argsort(player_age)[::-1]  # Indices for sorting in descending order

# Get the top 10 most aged people
top_10_aged = data[sorted_indices[:10]]

# Print the data for the top 10 most aged people
print("Top 10 Most Aged People:")
print(top_10_aged)


Top 10 Most Aged People:
[[3.90000000e+01 7.12660073e+01 1.83815386e+02 1.00000000e+00
  1.02842592e-01 6.00000000e+00 0.00000000e+00]
 [3.90000000e+01 7.95992903e+01 1.91939563e+02 1.00000000e+00
  8.12473821e-01 4.00000000e+00 0.00000000e+00]
 [3.90000000e+01 8.23212361e+01 1.82918618e+02 1.00000000e+00
  4.02027217e-01 6.00000000e+00 0.00000000e+00]
 [3.90000000e+01 6.79214732e+01 1.87247239e+02 1.00000000e+00
  3.31616849e-01 4.00000000e+00 1.00000000e+00]
 [3.90000000e+01 5.90086351e+01 1.73572427e+02 0.00000000e+00
  2.69161571e-01 5.00000000e+00 0.00000000e+00]
 [3.90000000e+01 8.81400947e+01 1.74271279e+02 0.00000000e+00
  3.54831856e-01 5.00000000e+00 0.00000000e+00]
 [3.90000000e+01 6.56264432e+01 1.68786194e+02 0.00000000e+00
  6.03528807e-01 3.00000000e+00 1.00000000e+00]
 [3.90000000e+01 6.81911272e+01 1.89577277e+02 0.00000000e+00
  1.61457834e-01 5.00000000e+00 0.00000000e+00]
 [3.90000000e+01 6.05014343e+01 1.69921861e+02 1.00000000e+00
  5.95366370e-01 4.00000000e+00 1

**print shape size and dtype of data**

In [10]:
print(data.shape)
print(data.size)
print(data.dtype)

(1000, 7)
7000
float64


**print logic**

In [40]:
print(data[1]>5)

[ True  True  True False False  True False]


**getting player_height and finding max of first five player_height**

In [26]:
player_height = data[:, 2].astype(float)

print(player_height[:5])
print("maximum height of players for first 5 people: "+ str(max(player_height[:5]))+" cms")


[175.73242883 174.58165012 186.32961752 175.50423962 190.17501229]
maximum height of players for first 5 people: 190.1750122908418 cms


**get on best car**

In [15]:
import numpy 

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 32 data analysis\archive\Car Sales in India - (2019-2021).csv'

with open(file_path, 'r') as a:
    reader = csv.reader(a)
    data = list(reader)
    #print(data)
    