In [1]:
# Understanding the Basic Concepts in Pandas and NumPy Libraries

# -----------------------------------------------------------------------
# Analyzing Trending Videos on YouTube
# Data Source : Kaggle - https://www.kaggle.com/datasnaek/youtube-new#INvideos.csv
# Learning Source : Geeks for Geeks - https://www.geeksforgeeks.org/python-data-analysis-using-pandas/
# TutorialsPoint - https://www.tutorialspoint.com/python_pandas/python_pandas_descriptive_statistics.htm
# -----------------------------------------------------------------------


In [2]:
# Importing Pandas and NumPy Libraries
import numpy as np
import pandas as pd

In [17]:
# About Pandas
# Pandas is the most used libary in Data Analytics and its backend source code was written in C and Python.
# We can analyze data with Series and DataFrames

# Series are one dimensional arrays used to store data of any types. 
# The data types may be Scalar(int,values,string) or Dictionary(key-value) or nDarray

# Syntax : pandas.Series(data, index, dtype, copy)

# Series with Scalar values 
# Creating a Series with default index
myData=[1,2,3,4,5]
mySeries=pd.Series(myData)
print("Series with Default index")
print(mySeries)

# Creating a Series with custom index
myIndex=['a','b','c','d','e']
mySeries=pd.Series(myData,index=myIndex)
print("Series with Custom index")
print(mySeries)

# Series with Dictionary values
# Creating a Series with Dictionary
myDictionary={'a':1,'b':2,'c':3,'d':4}
mySeries=pd.Series(myDictionary)
print("Series with Dictionary")
print(mySeries)

# Series with nDarray values
myArray=[[1,2,3],[4,5,6],[7,8,9],[10,11,12]]
mySeries=pd.Series(myArray)
print("Series with nDarray")
print(mySeries)

# Dataframes are two dimensional array with rows and columns to store data
# Syntax : pandas.DataFrame( data, index, columns, dtype, copy)

# Dataframe with Scalar Values
myDataFrame=pd.DataFrame(myData)
print("DataFrame with Scalar Value")
print(myDataFrame)


# Dataframe with Dictionaries
d1={'a':1,'b':2,'c':3}
d2={'d':4,'b':5,'c':6}
myData={'Column1':d1,'Column2':d2}
myDataFrame=pd.DataFrame(myData)
print("DataFrame with Dictionaries")
print(myDataFrame)

# Dataframe with Series
s1=pd.Series([1,2,3])
s2=pd.Series([5,6,7,8])
myData={'Column1':s1,'Column2':s2}
myDataFrame=pd.DataFrame(myData)
print("DataFrame with Series")
print(myDataFrame)


Series with Default index
0    1
1    2
2    3
3    4
4    5
dtype: int64
Series with Custom index
a    1
b    2
c    3
d    4
e    5
dtype: int64
Series with Dictionary
a    1
b    2
c    3
d    4
dtype: int64
Series with nDarray
0       [1, 2, 3]
1       [4, 5, 6]
2       [7, 8, 9]
3    [10, 11, 12]
dtype: object
DataFrame with Scalar Value
   0
0  1
1  2
2  3
3  4
4  5
DataFrame with Dictionaries
   Column1  Column2
a      1.0      NaN
b      2.0      5.0
c      3.0      6.0
d      NaN      4.0
DataFrame with Series
   Column1  Column2
0      1.0        5
1      2.0        6
2      3.0        7
3      NaN        8


In [28]:
# Basic Functionalities in Series

# 1. axes - Returns a list of the row axis labels.
# 2. dtype - Returns the dtype of the object.
# 3. empty - Returns True if series is empty.
# 4. ndim - Returns the number of dimensions of the underlying data
# 5. size - Returns the number of elements in the underlying data.
# 6. values - Returns the Series as ndarray.
# 7. head() - Returns the first n rows.
# 8. tail() - Returns the last n rows.

myData=[1,2,3,4,5,6,7]
mySeries=pd.Series(myData)

print("Axes : ",mySeries.axes)
print("Datatype : ",mySeries.dtype)
print("Empty ? ",mySeries.empty)
print("Dimensions : ", mySeries.ndim)
print("Size : ",mySeries.size)
print("Values : ",mySeries.values)
print("Head Elements : ",mySeries.head(4))
print("Tail Elements : ",mySeries.tail(2))

Axes :  [RangeIndex(start=0, stop=7, step=1)]
Datatype :  int64
Empty ?  False
Dimensions :  1
Size :  7
Values :  [1 2 3 4 5 6 7]
Head Elements :  0    1
1    2
2    3
3    4
dtype: int64
Tail Elements :  5    6
6    7
dtype: int64


In [39]:
# Basic Functionalities in DataFrame

# 1. axes - Returns a list of the row axis labels.
# 2. dtypes - Returns the dtype of the object.
# note: For Series :dtype
# 3. empty - Returns True if series is empty.
# 4. ndim - Returns the number of dimensions of the underlying data
# 5. size - Returns the number of elements in the underlying data.
# 6. values - Returns the Series as ndarray.
# 7. head() - Returns the first n rows.
# 8. tail() - Returns the last n rows.
# 9. T - transpose rows and columns
# 10. shape - returns shape of the dataframe

myData = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
   'Age':pd.Series([25,26,25,23,30,29,23]),
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
myDataFrame=pd.DataFrame(myData)

print("DataFrame")
print(myDataFrame)

print("Axes : ", myDataFrame.axes)
print("Datatype : ",myDataFrame.dtypes)
print("Empty ?",myDataFrame.empty)
print("Dimensions: ",myDataFrame.ndim)
print("Size : ",myDataFrame.size)
print("Values", myDataFrame.values)
print("Head Elements : ",myDataFrame.head())
print("Tail Elements : ",myDataFrame.tail(3))
print("Shape : ",myDataFrame.shape)
print("Transposed DataFrame")
print(myDataFrame.T)


DataFrame
    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   29    4.60
6   Jack   23    3.80
Axes :  [RangeIndex(start=0, stop=7, step=1), Index(['Name', 'Age', 'Rating'], dtype='object')]
Datatype :  Name       object
Age         int64
Rating    float64
dtype: object
Empty ? False
Dimensions:  2
Size :  21
Values [['Tom' 25 4.23]
 ['James' 26 3.24]
 ['Ricky' 25 3.98]
 ['Vin' 23 2.56]
 ['Steve' 30 3.2]
 ['Smith' 29 4.6]
 ['Jack' 23 3.8]]
Head Elements :      Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
Tail Elements :      Name  Age  Rating
4  Steve   30     3.2
5  Smith   29     4.6
6   Jack   23     3.8
Shape :  (7, 3)
Transposed DataFrame
           0      1      2     3      4      5     6
Name     Tom  James  Ricky   Vin  Steve  Smith  Jack
Age       25     26     25    23     30     29    23
Rating  4.23   