In [None]:
# Pandas is an open-source library in Python that is made mainly for working with relational or labeled data both easily and intuitively.
# It provides various data structures and operations for manipulating numerical data and time series.
# This library is built on top of the NumPy library of Python

# Why Use Pandas?
# Fast and efficient for manipulating and analyzing data.
# Data from different file objects can be easily loaded.
# Flexible reshaping and pivoting of data sets
# Provides time-series functionality.

import pandas as pd
import numpy as np

# Pandas Series is a one-dimensional labeled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called indexes.

s = pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(s)
print(type(s))

In [None]:
s.index

In [None]:
t = pd.Series(np.random.randn(5)) # did not indicate index -> use default
print(t)

In [None]:
t.index

In [None]:
d = {1:"a",2:"b",3:"c"}
pd.Series(d)

In [None]:
pd.Series(d,index=[1,2,3,4,5])

In [None]:
s = pd.Series([x for x in range(1,5)],list('abcd'))
print(s)

In [None]:
s = pd.Series([x for x in range(1,5)])
print(s)

In [None]:
pd.Series(999)

In [None]:
pd.Series(999,['a','b','c','d'])

In [None]:
pd.Series(999,list('abcd'))

In [None]:
s = pd.Series(np.random.randn(5),index=['a','b','c','d','e'])
print(s)
print(s[0])
print(s['b'])
s['c'] = 999
print(s)
s[2] = 111
s

In [None]:
print(s.get('b'))
print(s[1])
s.get('f')
s.get('f','no f to be found')

In [None]:
print(s)
print(s[:3])

In [None]:
s = pd.Series([1,2,3,4])
np.sqrt(s)

In [None]:
np.square(s)

In [None]:
s = pd.Series([x for x in range(1,10)],list('abcdefghi'))
print(s)

In [None]:
# Pandas DataFrame is a two-dimensional size-mutable, potentially heterogeneous tabular data structure

df = pd.DataFrame()
print(df)

# list of strings
lst = ['Geeks', 'For', 'Geeks', 'is', 'portal', 'for', 'Geeks']

# Calling DataFrame constructor on list
df = pd.DataFrame(lst)
print(df)

In [None]:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}

#load data into a DataFrame object:
df = pd.DataFrame(data)

print(df)

In [None]:


d={'one': pd.Series([1.,2.,3.],list('abc')),
  'two': pd.Series([1.,2.,3.,4.],list('abcd'))}

print(d)
pd.DataFrame(d)

In [None]:
pd.DataFrame(d,index=['a','b','c'])


In [None]:
d={"name":"wxw",'age':20,'grade':100}
pd.DataFrame(d,index=['a'])

In [None]:
list1=[{"name":"wxw",'age':20,'grade':100},{"name":"wxw",'age':20,'grade':100}]
pd.DataFrame(list1,index=[0,1])

In [None]:
array = np.array([[1,2,3,4],[4,5,6,7]])
print(array)
pd.DataFrame(array,columns=list('abcd'))


In [None]:
pd.DataFrame(array,columns=list('abcd'),index=['aa','bb'])

In [None]:
array = np.arange(1,13).reshape(3,4)
array
pd.DataFrame(array,columns=list('abcd'))

In [None]:
a = [[[1,2],[3,4]],[[5,6],[7,8]]]
pd.DataFrame(a)

In [None]:
df = pd.DataFrame({'one':[1,2,3,4,5,6],
                   'two':[2,3,4,5,6,7]})
df

In [None]:
df['one']

In [None]:
df[df['one']>3]

In [None]:
df['three'] = df['one'] + df['two']
print(df)

In [None]:
df['four'] = df['one'] > 2
df['five'] = 9999
df

In [None]:
df['six'] = pd.Series(['b','c', 'd'], index=[1,2,3])
df

In [None]:
df.insert(2,'insert_col',list('zyxwvu'))
df

In [None]:
df[df['one']>3]

In [None]:
del df['two']
df

In [None]:
import pandas as pd
from google.colab import files
uploaded = files.upload() # load Data.xlsx from local drive
filename = list(uploaded.keys())[0]
data = pd.read_excel(open(filename, 'rb'), index_col = 0) # index_col to tell pandas which column (i.e. Student Name) to use as the index column when importing:
data1 = pd.read_excel(open(filename, 'rb')) # Use default index

print(data)
print(data.index)

print(data1)
print(data1.index)



In [None]:

# Print Column
print(data['Lab'])
print(data1['Assignment'].to_string(index=False)) ##print the values in the Assignment column without index

# Print Row
print(data.loc['Mike'])
print(data.iloc[3])
print(data.iloc[:2])

data["Mark"] = data["Lab"] * 0.4 + data["Assignment"] * 0.3 + data["Final Exam"] * 0.3
data["Attendance Rate"] = data["Lecture Attendance"] / 13 * (2/3) + data["Lab Attendance"] / 13 * (1/3)


data

In [None]:
print(data.dtypes) #To check the data type of all columns in Pandas DataFrame:
convert_dict = {'Attendance Rate': float}
data = data.astype(convert_dict)

print(data.dtypes)
data.loc[data["Mark"] >= 75, "Grade"] = 'A'
data.loc[(data["Mark"] >= 60) & (data["Mark"] < 75), "Grade"] = 'B'
data.loc[(data["Mark"] >= 45) & (data["Mark"] < 60), "Grade"] = 'C'
data.loc[(data["Mark"] >= 40) & (data["Mark"] < 45), "Grade"] = 'D'
data.loc[data["Mark"] < 40, "Grade"] = 'F'
data.loc[data["Attendance Rate"] < 0.8, "Grade"] = 'F'
print(data)

data.sort_values(by=["Grade","Mark"],ascending=(True,False),inplace=True)
print(data)

data

In [None]:
with pd.ExcelWriter('Results.xlsx') as writer: data.to_excel(writer, sheet_name='Mark Sheet')
files.download('Results.xlsx')