# Pandas is a powerful and widely used open-source data analysis and manipulation library in Python. It's particularly known for its ability to handle and process large amounts of data with ease. Here are some of the basic concepts and functionalities of Pandas

**Series: A one-dimensional labeled array capable of holding any data type (integers, strings, floats, etc.).**

In [28]:
import pandas as pd
s = pd.Series([1, 2, 3, 4, 5])
print(s)

0    1
1    2
2    3
3    4
4    5
dtype: int64


**DataFrame: A two-dimensional table with labeled axes (rows and columns). It's similar to a spreadsheet or SQL table.**

In [5]:
import pandas as pd
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})
print(df)

   A  B
0  1  4
1  2  5
2  3  6


**You can create a DataFrame from:
Lists of dictionaries or lists of lists
CSV files
Excel files
SQL queries**

In [49]:
import pandas as pd
data = {
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}
df = pd.DataFrame(data)
print(df)

      name  age
0    Alice   25
1      Bob   30
2  Charlie   35


**filter data**

In [16]:
import pandas as pd
data = {
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}
df = pd.DataFrame(data)
df[df['age'] > 30]

Unnamed: 0,name,age
2,Charlie,35


In [24]:
import pandas as pd
data = {
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}
df = pd.DataFrame(data)
print(df.columns)


Index(['name', 'age'], dtype='object')


**group by age mean**

In [52]:
import pandas as pd

data = {
    'name': ['Alice', 'Bob', 'Charlie','Sathvik'],
    'age': [25, 30, 35, 25],
    'score': [80, 98, 76, 89]
}
df = pd.DataFrame(data)

# Group by 'age' and calculate the mean of numeric columns
result = df.groupby('age').mean(numeric_only=True)
print(result)

     score
age       
25    84.5
30    98.0
35    76.0


**getting sales by company**

In [32]:
import pandas as pd

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 33 data analysis\archive\Car Sales in India - (2019-2021).csv'

# Reading the CSV file using pandas
data = pd.read_csv(file_path)

# Display the total year sales of year company
df = pd.DataFrame(data)

df['Total Sales'] = df.drop(['Company', 'Year'], axis=1).sum(axis=1)

print(df[['Company', 'Year', 'Total Sales']])

          Company  Year  Total Sales
0   Maruti Suzuki  2021      1364787
1         Hyundai  2021       505033
2            Tata  2021       330552
3        Mahindra  2021       203124
4             Kia  2021       180261
5           Honda  2021        89153
6          Toyota  2021       130799
7         Renault  2021        95878
8            Ford  2021        33480
9          Nissan  2021        39090
10             MG  2021        39858
11     Volkswagen  2021        26064
12          Skoda  2021        23857
13           Jeep  2021        11652
14  Maruti Suzuki  2020      1213388
15        Hyundai  2020       423642
16           Tata  2020       169880
17       Mahindra  2020       138873
18            Kia  2020       140505
19          Honda  2020        70533
20         Toyota  2020        76111
21        Renault  2020        80518
22           Ford  2020        45799
23         Nissan  2020         2377
24             MG  2020        28162
25     Volkswagen  2020        15598
2

**getting sales by year**

In [34]:
import pandas as pd

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 33 data analysis\archive\Car Sales in India - (2019-2021).csv'

# Reading the CSV file using pandas
data = pd.read_csv(file_path)

df = pd.DataFrame(data)

# Filter data for the year 2021
df_2020 = df[df['Year'] == 2019].copy()  # Ensure a copy is made

# Calculate total sales for each company using .loc[]
df_2020.loc[:, 'Total Sales'] = df_2020.drop(['Company', 'Year'], axis=1).sum(axis=1)

# Sort the DataFrame by 'Total Sales' in descending order
df_2020_sorted = df_2020.sort_values(by='Total Sales', ascending=True)

# Display the result
print(df_2020_sorted[['Company', 'Year', 'Total Sales']])

          Company  Year  Total Sales
37         Nissan  2019         6910
41           Jeep  2019        10935
40          Skoda  2019        15284
38             MG  2019        15930
39     Volkswagen  2019        32324
32            Kia  2019        45494
36           Ford  2019        73636
35        Renault  2019        88869
34         Toyota  2019       126701
33          Honda  2019       134738
30           Tata  2019       152002
31       Mahindra  2019       219682
29        Hyundai  2019       510260
28  Maruti Suzuki  2019      1464450


**getting name column and filter data by mom growth**

In [190]:
import pandas as pd

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 33 data analysis\archive\car_sales_data_august_2024.csv'

# Load the data from the CSV file
data = pd.read_csv(file_path)

# Convert the data to a DataFrame
df = pd.DataFrame(data)

# Print the column names and the 'name' column
print(df.columns)
print(df['name'])

print("------------------------------------------------------------")

# Filter the DataFrame for MoM_Growth_(%) > 0 and name being one of the specified brands
filtered_df = df[(df["MoM_Growth_(%)"] > 0) & (df["name"].isin(["Skoda", "Tata", "Maruti Suzuki"]))]

# Sort the filtered DataFrame by MoM_Growth_(%) in descending order
filtered_df2 = df[(df["MoM_Growth_(%)"] > 0) & (df["name"] == "Maruti Suzuki")]
df_sorted = filtered_df.sort_values(by='MoM_Growth_(%)', ascending=False)

# Print the 'name' and 'MoM_Growth_(%)' columns from the sorted DataFrame
print(df_sorted[['name', "MoM_Growth_(%)"]])


Index(['RANK', 'name', 'YoY_Analysis_Aug'24', 'YoY_Analysis_Aug'23',
       'YoY_Growth_(%)', 'MoM_Analysis_Aug'24', 'MoM_Analysis_July'24',
       'MoM_Growth_(%)', 'Market_Share_Aug'24_(%)', 'Market_Share_Aug'23_(%)',
       'Market_Share_Dif_(%)'],
      dtype='object')
0     Maruti Suzuki
1           Hyundai
2              Tata
3          Mahindra
4            Toyota
5               Kia
6             Honda
7                MG
8                VW
9           Renault
10            Skoda
11           Nissan
12          Citroen
13             Jeep
Name: name, dtype: object
------------------------------------------------------------
             name  MoM_Growth_(%)
10          Skoda            31.8
0   Maruti Suzuki             4.1


**getting best camera as per requirement for the given filters**

In [36]:
import pandas as pd

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 33 data analysis\archive\camera_dataset.csv'

data = pd.read_csv(file_path)

df = pd.DataFrame(data)

filtered_data = df[(df['Max resolution'] > 1023) & (df['Price'] < 750) & (df['Price'] >550) & (df['Storage included'] >9) ]

print(filtered_data[['Model','Price']])


                 Model  Price
1022  Toshiba PDR-4300    599


**dropping index**

In [46]:
import pandas as pd

file_path = r'C:\Users\SESPL\bhargav-workspace\Day 31 - 33 data analysis\archive\camera_dataset.csv'

data = pd.read_csv(file_path)

df = pd.DataFrame(data)

# Filter the data based on conditions
filtered_data = df[(df['Max resolution'] > 1023) & 
                   (df['Price'] < 750) & 
                   (df['Price'] > 550) & 
                   (df['Storage included'] > 9)]

# Print the filtered data without index
print(filtered_data[['Model', 'Price']].to_string(index=False))


           Model  Price
Toshiba PDR-4300    599
