##  What's Pandas?
###  Package for storing, managing, and manipulating data in Python. 
###  Two Important Objects Or Keys By Pandas `Series` & `DataFrames`.

#### `Series` a `sequence of data`, like `list` in numpy.
#### Series has a single data type.
#### Series are different with numpy in indexing.

#### `DataFrames` are Multiple Series of Common `Length`, with a Common `index` bound together in a single `tabular` object.

In [5]:
import numpy as np
import pandas as pd

In [10]:
irisCustomType = np.dtype([('sepal.length', np.float16), ('sepal.width', np.float16),('petal.length', np.float16),('petal.width', np.float16),('variety', '<U16')])

In [11]:
irisCustomType

dtype([('sepal.length', '<f2'), ('sepal.width', '<f2'), ('petal.length', '<f2'), ('petal.width', '<f2'), ('variety', '<U16')])

In [13]:
loaded_iris_csv_data=np.loadtxt('iris.csv', skiprows=1, dtype=irisCustomType, delimiter=',')

In [17]:
# loaded_iris_csv_data
loaded_iris_csv_data[:5]

array([(5.1, 3.5, 1.4, 0.2, '"Setosa"'), (4.9, 3. , 1.4, 0.2, '"Setosa"'),
       (4.7, 3.2, 1.3, 0.2, '"Setosa"'), (4.6, 3.1, 1.5, 0.2, '"Setosa"'),
       (5. , 3.6, 1.4, 0.2, '"Setosa"')],
      dtype=[('sepal.length', '<f2'), ('sepal.width', '<f2'), ('petal.length', '<f2'), ('petal.width', '<f2'), ('variety', '<U16')])

In [23]:
loaded_iris_csv_data[:5][0]

(5.1, 3.5, 1.4, 0.2, '"Setosa"')

In [18]:
loaded_iris_csv_data[:5]['variety']

array(['"Setosa"', '"Setosa"', '"Setosa"', '"Setosa"', '"Setosa"'],
      dtype='<U16')

In [19]:
# Return Just One Column > return single list contains elements -> values of defined column
loaded_iris_csv_data[:5]['petal.length']

array([1.4, 1.4, 1.3, 1.5, 1.4], dtype=float16)

In [20]:
loaded_iris_csv_data[:5]['petal.length'][0]

1.4

In [21]:
loaded_iris_csv_data[:5]['petal.length'][:2]

array([1.4, 1.4], dtype=float16)

In [22]:
loaded_iris_csv_data[:5]['petal.length'][:3]

array([1.4, 1.4, 1.3], dtype=float16)

In [24]:
# Return Two Columns > return single list contains tuples includes two values of two defined columns
loaded_iris_csv_data[:5][['petal.length', 'petal.width']]

array([(1.4, 0.2), (1.4, 0.2), (1.3, 0.2), (1.5, 0.2), (1.4, 0.2)],
      dtype={'names':['petal.length','petal.width'], 'formats':['<f2','<f2'], 'offsets':[4,6], 'itemsize':72})

In [25]:
loaded_iris_csv_data[:5][['petal.length', 'petal.width']][0]

(1.4, 0.2)

In [26]:
# Explore by index
loaded_iris_csv_data[:5][['petal.length', 'petal.width']][0][0]

1.4

In [28]:
# Explore by column's unique name
loaded_iris_csv_data[:5][['petal.length', 'petal.width']][0]['petal.length']

1.4

### IN Pandas

In [29]:
# Corresponding in pandas is read_csv function that parses csv file correctly

In [31]:
loaded_iris_csv_data_pnd=pd.read_csv('iris.csv')

In [32]:
loaded_iris_csv_data_pnd

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [33]:
type(loaded_iris_csv_data_pnd)

pandas.core.frame.DataFrame

In [34]:
# Retrun The First Five Rows -> head function of Pandas DataFrame

In [35]:
loaded_iris_csv_data_pnd.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [41]:
loaded_iris_csv_data_pnd.head().sepal.length

AttributeError: 'DataFrame' object has no attribute 'sepal'

In [39]:
loaded_iris_csv_data_pnd.head()['sepal.length']

0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: sepal.length, dtype: float64

In [40]:
type(loaded_iris_csv_data_pnd.head()['sepal.length'])

pandas.core.series.Series

In [44]:
loaded_iris_csv_data_pnd.head()['sepal.width']

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
Name: sepal.width, dtype: float64

In [45]:
loaded_iris_csv_data_pnd.head()['sepal.length', 'sepal.width']

KeyError: ('sepal.length', 'sepal.width')

In [46]:
loaded_iris_csv_data_pnd.head().loc(:, ['sepal.length', 'sepal.width'])

SyntaxError: invalid syntax (Temp/ipykernel_1124/2513589310.py, line 1)

#### Pandas is built on top of Numpy

In [47]:
loaded_iris_csv_data_pnd.head().loc[:, ['sepal.length', 'sepal.width']]

Unnamed: 0,sepal.length,sepal.width
0,5.1,3.5
1,4.9,3.0
2,4.7,3.2
3,4.6,3.1
4,5.0,3.6


In [48]:
loaded_iris_csv_data_pnd.head().loc

<pandas.core.indexing._LocIndexer at 0x298a4dbcbd0>

In [51]:
print(loaded_iris_csv_data_pnd.head().loc)

<pandas.core.indexing._LocIndexer object at 0x00000298A4DBCF40>


In [52]:
loaded_iris_csv_data_pnd.head().loc[]

SyntaxError: invalid syntax (Temp/ipykernel_1124/2052196018.py, line 1)

In [53]:
loaded_iris_csv_data_pnd.head().loc[:]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [54]:
loaded_iris_csv_data_pnd.head().loc[:, ['variety']]

Unnamed: 0,variety
0,Setosa
1,Setosa
2,Setosa
3,Setosa
4,Setosa


In [55]:
loaded_iris_csv_data_pnd.head().loc[:, ['variety','petal.length']]

Unnamed: 0,variety,petal.length
0,Setosa,1.4
1,Setosa,1.4
2,Setosa,1.3
3,Setosa,1.5
4,Setosa,1.4


In [56]:
loaded_iris_csv_data_pnd.head().loc[:, ['petal.length','variety']]

Unnamed: 0,petal.length,variety
0,1.4,Setosa
1,1.4,Setosa
2,1.3,Setosa
3,1.5,Setosa
4,1.4,Setosa


#### Numpy Object Can be Used to Construct Pandas DataFrame

In [57]:
# loaded_iris_csv_data > Numpy Object
type(loaded_iris_csv_data)

numpy.ndarray

In [58]:
loaded_iris_csv_data_numpy_dataFrame=pd.DataFrame(loaded_iris_csv_data)
loaded_iris_csv_data_numpy_dataFrame

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.101562,3.500000,1.400391,0.199951,"""Setosa"""
1,4.898438,3.000000,1.400391,0.199951,"""Setosa"""
2,4.699219,3.199219,1.299805,0.199951,"""Setosa"""
3,4.601562,3.099609,1.500000,0.199951,"""Setosa"""
4,5.000000,3.599609,1.400391,0.199951,"""Setosa"""
...,...,...,...,...,...
145,6.699219,3.000000,5.199219,2.300781,"""Virginica"""
146,6.300781,2.500000,5.000000,1.900391,"""Virginica"""
147,6.500000,3.000000,5.199219,2.000000,"""Virginica"""
148,6.199219,3.400391,5.398438,2.300781,"""Virginica"""


In [43]:
loaded_iris_csv_data_pnd.values

array([[5.1, 3.5, 1.4, 0.2, 'Setosa'],
       [4.9, 3.0, 1.4, 0.2, 'Setosa'],
       [4.7, 3.2, 1.3, 0.2, 'Setosa'],
       [4.6, 3.1, 1.5, 0.2, 'Setosa'],
       [5.0, 3.6, 1.4, 0.2, 'Setosa'],
       [5.4, 3.9, 1.7, 0.4, 'Setosa'],
       [4.6, 3.4, 1.4, 0.3, 'Setosa'],
       [5.0, 3.4, 1.5, 0.2, 'Setosa'],
       [4.4, 2.9, 1.4, 0.2, 'Setosa'],
       [4.9, 3.1, 1.5, 0.1, 'Setosa'],
       [5.4, 3.7, 1.5, 0.2, 'Setosa'],
       [4.8, 3.4, 1.6, 0.2, 'Setosa'],
       [4.8, 3.0, 1.4, 0.1, 'Setosa'],
       [4.3, 3.0, 1.1, 0.1, 'Setosa'],
       [5.8, 4.0, 1.2, 0.2, 'Setosa'],
       [5.7, 4.4, 1.5, 0.4, 'Setosa'],
       [5.4, 3.9, 1.3, 0.4, 'Setosa'],
       [5.1, 3.5, 1.4, 0.3, 'Setosa'],
       [5.7, 3.8, 1.7, 0.3, 'Setosa'],
       [5.1, 3.8, 1.5, 0.3, 'Setosa'],
       [5.4, 3.4, 1.7, 0.2, 'Setosa'],
       [5.1, 3.7, 1.5, 0.4, 'Setosa'],
       [4.6, 3.6, 1.0, 0.2, 'Setosa'],
       [5.1, 3.3, 1.7, 0.5, 'Setosa'],
       [4.8, 3.4, 1.9, 0.2, 'Setosa'],
       [5.0, 3.0, 1.6, 0.

### Creating Series

In [60]:
from pandas import DataFrame, Series

In [61]:
ser1=Series([1,2,3,4]) # Single Type

In [62]:
ser1

0    1
1    2
2    3
3    4
dtype: int64

In [65]:
ser2=Series(['a','b','c','d'])

In [66]:
ser2

0    a
1    b
2    c
3    d
dtype: object

In [None]:
# Notice: an index was automatically assigned to both of these series.

In [67]:
idx=pd.Index(["New York", "Los Angleles", "Chicago", "Houston"])
print(inx)

NameError: name 'inx' is not defined

In [68]:
print(idx)

Index(['New York', 'Los Angleles', 'Chicago', 'Houston'], dtype='object')


In [70]:
# Populations
pops=Series([8550, 3972, 2721, np.nan], index=idx, name="Population")
print(pops)

New York        8550.0
Los Angleles    3972.0
Chicago         2721.0
Houston            NaN
Name: Population, dtype: float64


#### Create Series Using Dict

In [72]:
state=Series({"New York":"New York", "Los Angeles": "California", "San Antonio": "Texas"}, name="States")
print(state)

New York         New York
Los Angeles    California
San Antonio         Texas
Name: States, dtype: object


In [73]:
state['New York']

'New York'

In [74]:
state['Los Angeles']

'California'

### Creating DataFrames

In [76]:
# Series Used to Build Pandas DataFrames.
# pandas DataFrame as combining series together to form a tabular object.

In [81]:
mat=np.arange(0, 9).reshape(3,3)
mat

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [83]:
mat.ndim

2

In [84]:
print(DataFrame(mat))

   0  1  2
0  0  1  2
1  3  4  5
2  6  7  8


In [85]:
print(DataFrame(mat, index=['a','b','c'], columns=['alpha', 'beta',''gamaa]))

SyntaxError: invalid syntax. Perhaps you forgot a comma? (Temp/ipykernel_1124/3451200721.py, line 1)

In [86]:
print(DataFrame(mat, index=['a','b','c'], columns=['alpha', 'beta','gamaa']))

   alpha  beta  gamaa
a      0     1      2
b      3     4      5
c      6     7      8


#### Create DataFrame from a list of tuples

#### Create DataFrame from a dict

In [87]:
print(DataFrame({"Name": ['ahmed', 'basem', 'noah'], "Gender": [12,27,950]})) # Same Length

    Name  Gender
0  ahmed      12
1  basem      27
2   noah     950


In [88]:
print(DataFrame({"Name": ['ahmed', 'basem', 'noah'], "Gender": [12,27]})) # Same Length

ValueError: All arrays must be of the same length

In [None]:
# the function does not know how to assign missing information.

In [89]:
names=Series(['ahmed', 'mohamed', 'ali'], name="Name")

In [None]:
genders=Series([12, 25], name="Gender")

In [91]:
print(DataFrame({"Name": names, "Gender": genders}))

NameError: name 'genders' is not defined

In [93]:
print(DataFrame({"Name": names, "Gender": genders})) # Not Same Length > Series Solved the Issue.

      Name  Gender
0    ahmed    12.0
1  mohamed    25.0
2      ali     NaN


In [94]:
pops

New York        8550.0
Los Angleles    3972.0
Chicago         2721.0
Houston            NaN
Name: Population, dtype: float64

In [95]:
state

New York         New York
Los Angeles    California
San Antonio         Texas
Name: States, dtype: object

In [96]:
print(DataFrame({"Name": names, "Gender": genders, "Home": pops}))

                 Name  Gender    Home
0               ahmed    12.0     NaN
1             mohamed    25.0     NaN
2                 ali     NaN     NaN
Chicago           NaN     NaN  2721.0
Houston           NaN     NaN     NaN
Los Angleles      NaN     NaN  3972.0
New York          NaN     NaN  8550.0


In [None]:
print(DataFrame([]))

## Resources
iris.csv: https://gist.github.com/netj/8836201