In [1]:
import pandas as pd

In [2]:
pd.__version__

'1.4.4'

# The Pandas Series Object


In [3]:
 data = pd.Series([0.25, 0.5, 0.75, 1.0])
 data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [7]:
# Series as generalized NumPy array:

In [8]:
 data = pd.Series([0.25, 0.5, 0.75, 1.0],
 index=['a', 'b', 'c', 'd'])
 data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [9]:
data['b']

0.5

In [10]:
 data = pd.Series([0.25, 0.5, 0.75, 1.0],
 index=[2, 5, 3, 7])
 data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [11]:
data[5]


0.5

In [12]:
# Series as specialized dictionary:

In [13]:
 population_dict = {'California': 38332521,
 'Texas': 26448193,
 'New York': 19651127,
 'Florida': 19552860,
 'Illinois': 12882135}
 population = pd.Series(population_dict)
 population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [14]:
 population['California':'Illinois']


California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [15]:
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [16]:
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

In [17]:
pd.Series({2:'a', 1:'b', 3:'c'})

2    a
1    b
3    c
dtype: object

# The Pandas DataFrame Object:


In [18]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
 'Florida': 170312, 'Illinois': 149995}

In [19]:
area = pd.Series(area_dict)
area


California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [20]:
 states = pd.DataFrame({'population': population,
 'area': area})
 states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [21]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [22]:
states.columns

Index(['population', 'area'], dtype='object')

# DataFrame as specialized dictionary


In [23]:
states['area']


California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [24]:
# From a single Series object:

In [25]:
pd.DataFrame(population, columns=['population'])


Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [26]:
 # From a list of dicts
 data = [{'a': i, 'b': 2 * i}
 for i in range(3)]
 pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [27]:
pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])


Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [28]:
pd.DataFrame({'population': population,
 'area': area})


Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [29]:
 import numpy as np
 pd.DataFrame(np.random.rand(3, 2),
 columns=['foo', 'bar'],
 index=['a', 'b', 'c'])


Unnamed: 0,foo,bar
a,0.885245,0.122875
b,0.981446,0.774793
c,0.726096,0.834102


In [30]:
 A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')])
 A
    

array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])

In [31]:
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


In [32]:
 ind = pd.Index([2, 3, 5, 7, 11])
 ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [33]:
#Index as immutable array

ind[1] = 0

TypeError: Index does not support mutable operations

In [None]:
#Index as ordered set
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [None]:
indA & indB

In [None]:
indA | indB

In [None]:
indA ^ indB

# Data Indexing and Selection:

In [None]:
 data = pd.Series([0.25, 0.5, 0.75, 1.0],
 index=['a', 'b', 'c', 'd'])
 data

# Ufuncs: Index Preservation:



In [36]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
columns=['A', 'B', 'C', 'D'])
np.sin(df * np.pi / 4),np.exp(ser)

(          A             B         C             D
 0 -1.000000  7.071068e-01  1.000000 -1.000000e+00
 1 -0.707107  1.224647e-16  0.707107 -7.071068e-01
 2 -0.707107  1.000000e+00 -0.707107  1.224647e-16,
 0     403.428793
 1      20.085537
 2    1096.633158
 3      54.598150
 dtype: float64)

In [37]:
# UFuncs: Index Alignment:

In [38]:
# Index alignment in Series:

In [39]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
 'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
 'New York': 19651127}, name='population')


In [40]:
population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [41]:
 A = pd.Series([2, 4, 6], index=[0, 1, 2])
 B = pd.Series([1, 3, 5], index=[1, 2, 3])
 A + B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [42]:
A.add(B, fill_value=0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [43]:
#Index alignment in DataFrame:


In [44]:
 A = pd.DataFrame(rng.randint(0, 20, (2, 2)),
 columns=list('AB'))
 A

Unnamed: 0,A,B
0,1,11
1,5,1


In [45]:
 B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
 columns=list('BAC'))
 B

Unnamed: 0,B,A,C
0,4,0,9
1,5,8,0
2,9,2,6


In [46]:
 A + B

Unnamed: 0,A,B,C
0,1.0,15.0,
1,13.0,6.0,
2,,,


In [47]:
 fill = A.stack().mean()
 A.add(B, fill_value=fill)


Unnamed: 0,A,B,C
0,1.0,15.0,13.5
1,13.0,6.0,4.5
2,6.5,13.5,10.5


In [48]:
'''Python operator Pandas method(s)
+ add()
- sub(), subtract()
* mul(), multiply()
/ truediv(), div(), divide()
// floordiv()
% mod()
** pow()
'''

'Python operator Pandas method(s)\n+ add()\n- sub(), subtract()\n* mul(), multiply()\n/ truediv(), div(), divide()\n// floordiv()\n% mod()\n** pow()\n'

In [49]:
# Ufuncs: Operations Between DataFrame and Series:

In [50]:
 A = rng.randint(10, size=(3, 4))
 A

array([[3, 8, 2, 4],
       [2, 6, 4, 8],
       [6, 1, 3, 8]])

In [51]:
A - A[0]

array([[ 0,  0,  0,  0],
       [-1, -2,  2,  4],
       [ 3, -7,  1,  4]])

In [54]:
 df = pd.DataFrame(A, columns=list('QRST'))
 df - df.iloc[0]


Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-1,-2,2,4
2,3,-7,1,4


In [73]:
!git init
!git add .
!git commit -m "first commit"
!git branch -M main

Reinitialized existing Git repository in C:/Users/hamdi/OneDrive/Documents/Untitled Folder/Data science book/.git/


The file will have its original line endings in your working directory
The file will have its original line endings in your working directory
The file will have its original line endings in your working directory
The file will have its original line endings in your working directory


[main ee82675] first commit
 4 files changed, 3975 insertions(+), 52 deletions(-)
 create mode 100644 .ipynb_checkpoints/chapter 3 part 2-checkpoint.ipynb
 create mode 100644 chapter 3 part 2.ipynb


In [74]:
!git remote add origin https://github.com/a-hamdi/Data_science_hand_book



error: remote origin already exists.


In [75]:
!git push -u origin main

To https://github.com/a-hamdi/Data_science_hand_book
 ! [rejected]        main -> main (non-fast-forward)
error: failed to push some refs to 'https://github.com/a-hamdi/Data_science_hand_book'
hint: Updates were rejected because the tip of your current branch is behind
hint: its remote counterpart. Integrate the remote changes (e.g.
hint: 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.


In [83]:
!git pull origin main

From https://github.com/a-hamdi/Data_science_hand_book
 * branch            main       -> FETCH_HEAD
fatal: refusing to merge unrelated histories


In [82]:
!git pull main

fatal: 'main' does not appear to be a git repository
fatal: Could not read from remote repository.

Please make sure you have the correct access rights
and the repository exists.


In [81]:
!git fetch

In [85]:
!git checkout -b branche2

Switched to a new branch 'branche2'


In [86]:
!git merge origin/main

fatal: refusing to merge unrelated histories


In [87]:
!git merge --allow-unrelated-histories

fatal: No remote for the current branch.
