### NumPy

In [1]:
import numpy as np
np.__version__

'2.2.2'

#### Python listesinden Numpy arrayi oluşturma

In [6]:
np.array([1,4,2,5,3])

array([1, 4, 2, 5, 3])

#### NpArrayin tipini baştan belirleyebiliriz

In [7]:
np.array([1,4,2,5,3], dtype='float32')

array([1., 4., 2., 5., 3.], dtype=float32)

#### Numpy ile multidimensional array tanımlama

In [8]:
np.array([range(i,i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

NOTE: Nested listeler otomatik olarak multidimensional olur

In [9]:
np.full((3,5),3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [11]:
np.ones((3,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [17]:
x = np.array([1,2,3])
x

array([1, 2, 3])

In [22]:
x[: ,np.newaxis]

array([[1],
       [2],
       [3]])

In [29]:
np.random.seed(0)

def compute_reciprocals(values):
 output = np.empty(len(values))
 for i in range(len(values)):
     output[i] = 1.0 / values[i]
 return output

    
 values = np.random.randint(1, 10, size=5)
 compute_reciprocals(values)

In [31]:
big_array = np.random.randint(1, 100, size=1000000)
%timeit compute_reciprocals(big_array)

4.68 s ± 176 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
print(type(big_array.min()))

<class 'numpy.int32'>


In [54]:
help(np.zeros_like)

Help on _ArrayFunctionDispatcher in module numpy:

zeros_like(a, dtype=None, order='K', subok=True, shape=None, *, device=None)
    Return an array of zeros with the same shape and type as a given array.

    Parameters
    ----------
    a : array_like
        The shape and data-type of `a` define these same attributes of
        the returned array.
    dtype : data-type, optional
        Overrides the data type of the result.
    order : {'C', 'F', 'A', or 'K'}, optional
        Overrides the memory layout of the result. 'C' means C-order,
        'F' means F-order, 'A' means 'F' if `a` is Fortran contiguous,
        'C' otherwise. 'K' means match the layout of `a` as closely
        as possible.
    subok : bool, optional.
        If True, then the newly created array will use the sub-class
        type of `a`, otherwise it will be a base-class array. Defaults
        to True.
    shape : int or sequence of ints, optional.
        Overrides the shape of the result. If order='K' and 

In [55]:
help( np.searchsorted)

Help on _ArrayFunctionDispatcher in module numpy:

searchsorted(a, v, side='left', sorter=None)
    Find indices where elements should be inserted to maintain order.

    Find the indices into a sorted array `a` such that, if the
    corresponding elements in `v` were inserted before the indices, the
    order of `a` would be preserved.

    Assuming that `a` is sorted:

    `side`  returned index `i` satisfies
    left    ``a[i-1] < v <= a[i]``
    right   ``a[i-1] <= v < a[i]``

    Parameters
    ----------
    a : 1-D array_like
        Input array. If `sorter` is None, then it must be sorted in
        ascending order, otherwise `sorter` must be an array of indices
        that sort it.
    v : array_like
        Values to insert into `a`.
    side : {'left', 'right'}, optional
        If 'left', the index of the first suitable location found is given.
        If 'right', return the last such index.  If there is no suitable
        index, return either 0 or N (where N is the lengt

In [9]:
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),'formats':('U10', 'i4', 'f8')})
print(data.dtype)
print(data)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]
[('', 0, 0.) ('', 0, 0.) ('', 0, 0.) ('', 0, 0.)]


### Pandas

In [2]:
import pandas as pd

In [6]:
data = pd.Series([0.25,0.5,0.75,1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [9]:
data = pd.Series([0.25,0.5,0.75,1.0],
                index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
population_dict = {'California': 38332521,
 'Texas': 26448193,
 'New York': 19651127,
 'Florida': 19552860,
 'Illinois': 12882135}

population = pd.Series(population_dict)
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [12]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
 'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64

In [14]:
states = pd.DataFrame({'population':population,
                       'area':area})
states

Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [15]:
states.index

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object')

In [16]:
states.columns

Index(['population', 'area'], dtype='object')

In [27]:
#states['California'] HATA
states['population'] # column çağırabiliriz

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
Name: population, dtype: int64

In [30]:
data = [{'a':i,'b':i*2}
         for i in range(3)]
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [42]:
data = pd.DataFrame([{'a':1,'b':2},{'b':3,'c':4}])
data

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [64]:
seededrandom = np.random.RandomState(40)
pd.DataFrame(seededrandom.rand(3,2),
             columns=['foo','bar'],
             index=['a','b','c'])

Unnamed: 0,foo,bar
a,0.407687,0.055366
b,0.788535,0.287305
c,0.450351,0.303912


In [68]:
A = np.zeros(3, dtype=[('A','i8'),('B','f8')])
pd.DataFrame(A)

Unnamed: 0,A,B
0,0,0.0
1,0,0.0
2,0,0.0


### Index

In [69]:
ind = pd.Index([2,4,6,8])
ind

Index([2, 4, 6, 8], dtype='int64')

In [70]:
ind[0] #Nparray gibi davranır genel olarak

np.int64(2)

In [74]:
ind[::3] #Sliceing yapılabilir 

Index([2, 8], dtype='int64')

In [75]:
#Ancak immutable olduğu için değiştirilemez
ind[0] = 4

TypeError: Index does not support mutable operations

In [78]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [80]:
data['e'] = 1.25
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

In [81]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [82]:
data[0:3]

a    0.25
b    0.50
c    0.75
dtype: float64

In [83]:
data = pd.DataFrame(['a','b','c'], index=[1,3,5])
data

Unnamed: 0,0
1,a
3,b
5,c


In [91]:
data[0][1] # explicit index when indexing

'a'

In [93]:
data[1:3] # implicit index when slicing

Unnamed: 0,0
3,b
5,c


In [104]:
print(data.loc[1]) #explicit
print(data.loc[1:3]) #implicit
print(data.iloc[1]) #explicit
print(data.iloc[1:3])#implicit

0    a
Name: 1, dtype: object
   0
1  a
3  b
0    b
Name: 3, dtype: object
   0
3  b
5  c
