## Working with DataFrame objects

###### Pandas DataFrame 1

In [1]:
# Method of Creating DataFrames
# - Reading a .csv file into a DateFrame
# - From a Dictionary
#

In [2]:
import numpy as np
import pandas as pd

In [8]:
# Importing a .CSV file into a DataFrame
csv_df = pd.read_csv("C:/Users/parth/OneDrive/Desktop/PythonProjects/Free_Test_Data_200KB_CSV-1.csv")
csv_df

Unnamed: 0,SR.,NAME,GENDER,AGE,DATE,COUNTRY
0,1.0,Dett,Male,18.0,21/05/2015,Great Britain
1,2.0,Nern,Female,19.0,15/10/2017,France
2,3.0,Kallsie,Male,20.0,16/08/2016,France
3,4.0,Siuau,Female,21.0,21/05/2015,Great Britain
4,5.0,Shennice,Male,22.0,21/05/2016,France
...,...,...,...,...,...,...
15718,,,,,,
15719,,,,,,
15720,,,,,,
15721,,,,,,


In [4]:
# Importing a .xls file into a DataFrame
xls_df = pd.read_excel("C:/Users/parth/OneDrive/Desktop/PythonProjects/Sample-Spreadsheet-1-rows.xls")
xls_df

Unnamed: 0,SR.,NAME,GENDER,AGE,DATE,COUNTRY
0,1,Dett,Male,18,2015-05-21,Great Britain
1,2,Nern,Female,19,2017-10-15,France
2,3,Kallsie,Male,20,2016-08-16,France
3,4,Siuau,Female,21,2015-05-21,Great Britain
4,5,Shennice,Male,22,2016-05-21,France
...,...,...,...,...,...,...
2556,2557,Felisaas,Female,46,2022-05-21,Great Britain
2557,2558,Demetas,Female,47,2024-10-15,France
2558,2559,Jeromyw,Female,48,2023-08-16,Great Britain
2559,2560,Rashid,Female,49,2022-05-21,France


In [5]:
# Create a DataFrame from Dictionary
dict ={
    'Car_Brand':['Ford','Toyota','Renault','Tata','Mahindra'],
    'Avg_Yearly_Sale':[20000,22000,31000,27000,19000],
    'Best_Selling_Model':['Ecosport','Fortuner','Duster','Nexon','Scorpio']
}
car_df = pd.DataFrame(dict)
car_df

Unnamed: 0,Car_Brand,Avg_Yearly_Sale,Best_Selling_Model
0,Ford,20000,Ecosport
1,Toyota,22000,Fortuner
2,Renault,31000,Duster
3,Tata,27000,Nexon
4,Mahindra,19000,Scorpio


In [6]:
# Create a DataFrame from a Numpy Array of Arrays
nparr = np.array(
  [['Ford','Toyota','Renault','Tata','Mahindra'],
  [20000,22000,31000,27000,19000],
  ['Ecosport','Fortuner','Duster','Nexon','Scorpio']]
)
dictArr = {
    'Car_Brand':nparr[0],
    'Avg_Yearly_Sale':nparr[1],
    'Best_Selling_Model':nparr[2]
}
dfarr = pd.DataFrame(dictArr)
dfarr

Unnamed: 0,Car_Brand,Avg_Yearly_Sale,Best_Selling_Model
0,Ford,20000,Ecosport
1,Toyota,22000,Fortuner
2,Renault,31000,Duster
3,Tata,27000,Nexon
4,Mahindra,19000,Scorpio


In [7]:
# Create DataFrames list of Lists
list1 =[
    ['Ford',20000,'Ecosport'],
    ['Toyota',220000,'Fortuner'],
    ['Renault',310000,'Duster'],
    ['Tata',270000,'Nexon'],
    ['Mahindra',190000,'Scorpio']
]

dflist = pd.DataFrame(list1, columns = ['Car_Brand','Avg_Yearly_Sale','Best_Selling_Model'])
dflist

Unnamed: 0,Car_Brand,Avg_Yearly_Sale,Best_Selling_Model
0,Ford,20000,Ecosport
1,Toyota,220000,Fortuner
2,Renault,310000,Duster
3,Tata,270000,Nexon
4,Mahindra,190000,Scorpio


In [9]:
# Create DataFrame from a list of Series Objects(use Dictionary as intermediate)

series1 = pd.Series(['Ford','Toyota','Renault','Tata','Mahindra'])
series2 = pd.Series([20000,22000,31000,27000,19000])
series3 = pd.Series(['Ecosport','Fortuner','Duster','Nexon','Scorpio'])
dictionary_of_nparr = {'Name':series1, 'Age':series2,'Department':series3}
dfser = pd.DataFrame(dictionary_of_nparr)
dfser

Unnamed: 0,Name,Age,Department
0,Ford,20000,Ecosport
1,Toyota,22000,Fortuner
2,Renault,31000,Duster
3,Tata,27000,Nexon
4,Mahindra,19000,Scorpio


In [16]:
# Crating a DataFrame with Dates as Row Indexces
# Generate Range of Dates between 1st july 2023 to 31st july 2023

rng = pd.date_range("7/1/2023", periods = 31, freq="D")
print(rng)

# Generate two set of 31 random numbers
s1 = np.random.rand(31)
s2 = np.random.rand(31)

df1 = pd.DataFrame({'Col1':s1, 'Col2':s2})
df1.set_index(rng)

DatetimeIndex(['2023-07-01', '2023-07-02', '2023-07-03', '2023-07-04',
               '2023-07-05', '2023-07-06', '2023-07-07', '2023-07-08',
               '2023-07-09', '2023-07-10', '2023-07-11', '2023-07-12',
               '2023-07-13', '2023-07-14', '2023-07-15', '2023-07-16',
               '2023-07-17', '2023-07-18', '2023-07-19', '2023-07-20',
               '2023-07-21', '2023-07-22', '2023-07-23', '2023-07-24',
               '2023-07-25', '2023-07-26', '2023-07-27', '2023-07-28',
               '2023-07-29', '2023-07-30', '2023-07-31'],
              dtype='datetime64[ns]', freq='D')


Unnamed: 0,Col1,Col2
2023-07-01,0.989924,0.691443
2023-07-02,0.355141,0.325271
2023-07-03,0.729663,0.331958
2023-07-04,0.886255,0.973213
2023-07-05,0.345234,0.414635
2023-07-06,0.706537,0.681251
2023-07-07,0.503231,0.690037
2023-07-08,0.864033,0.525986
2023-07-09,0.267493,0.322098
2023-07-10,0.985664,0.935443


###### Pandas DataFrame 2

In [17]:
# Obtain statistical details about the Numeric Data in DataFrames
df1.describe()

Unnamed: 0,Col1,Col2
count,31.0,31.0
mean,0.537554,0.553634
std,0.294447,0.280114
min,0.010555,0.00239
25%,0.318748,0.367302
50%,0.539173,0.606041
75%,0.776982,0.728994
max,0.989924,0.984274


In [19]:
# Transpose the DataFrame

dft= df1.transpose()
dft

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
Col1,0.989924,0.355141,0.729663,0.886255,0.345234,0.706537,0.503231,0.864033,0.267493,0.985664,...,0.740961,0.294619,0.505619,0.539173,0.584855,0.322928,0.415277,0.845503,0.75439,0.178535
Col2,0.691443,0.325271,0.331958,0.973213,0.414635,0.681251,0.690037,0.525986,0.322098,0.935443,...,0.104951,0.873935,0.056131,0.699142,0.548676,0.19586,0.41042,0.606041,0.53269,0.665732


In [21]:
#Sort DataFrame on a Column
dfsort = dflist.sort_values(by='Avg_Yearly_Sale',ascending = False)
dfsort

Unnamed: 0,Car_Brand,Avg_Yearly_Sale,Best_Selling_Model
2,Renault,310000,Duster
3,Tata,270000,Nexon
1,Toyota,220000,Fortuner
4,Mahindra,190000,Scorpio
0,Ford,20000,Ecosport
