# Statsmodels

In [2]:
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [4]:
data = sm.datasets.get_rdataset('Guerry','HistData').data

In [5]:
data.head()

Unnamed: 0,dept,Region,Department,Crime_pers,Crime_prop,Literacy,Donations,Infants,Suicides,MainCity,...,Crime_parents,Infanticide,Donation_clergy,Lottery,Desertion,Instruction,Prostitutes,Distance,Area,Pop1831
0,1,E,Ain,28870,15890,37,5098,33120,35039,2:Med,...,71,60,69,41,55,46,13,218.372,5762,346.03
1,2,N,Aisne,26226,5521,51,8901,14572,12831,2:Med,...,4,82,36,38,82,24,327,65.945,7369,513.0
2,3,C,Allier,26747,7925,13,10973,17044,114121,2:Med,...,46,42,76,66,16,85,34,161.927,7340,298.26
3,4,E,Basses-Alpes,12935,7289,46,2733,23018,14238,1:Sm,...,70,12,37,80,32,29,2,351.399,6925,155.9
4,5,E,Hautes-Alpes,17488,8174,69,6962,23076,16171,1:Sm,...,22,23,64,79,35,7,1,320.28,5549,129.1


In [8]:
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)',data = data).fit()

In [10]:
results.summary()

0,1,2,3
Dep. Variable:,Lottery,R-squared:,0.348
Model:,OLS,Adj. R-squared:,0.333
Method:,Least Squares,F-statistic:,22.2
Date:,"Tue, 05 Mar 2024",Prob (F-statistic):,1.9e-08
Time:,01:41:44,Log-Likelihood:,-379.82
No. Observations:,86,AIC:,765.6
Df Residuals:,83,BIC:,773.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,246.4341,35.233,6.995,0.000,176.358,316.510
Literacy,-0.4889,0.128,-3.832,0.000,-0.743,-0.235
np.log(Pop1831),-31.3114,5.977,-5.239,0.000,-43.199,-19.424

0,1,2,3
Omnibus:,3.713,Durbin-Watson:,2.019
Prob(Omnibus):,0.156,Jarque-Bera (JB):,3.394
Skew:,-0.487,Prob(JB):,0.183
Kurtosis:,3.003,Cond. No.,702.0


In [11]:
results.params

Intercept          246.434135
Literacy            -0.488923
np.log(Pop1831)    -31.311392
dtype: float64

In [12]:
results.rsquared

0.3484706112599609

In [13]:
results.pvalues

Intercept          6.260771e-10
Literacy           2.462102e-04
np.log(Pop1831)    1.202925e-06
dtype: float64

# Scipy

In [14]:
from scipy import constants

In [15]:
dir(constants)

['Avogadro',
 'Boltzmann',
 'Btu',
 'Btu_IT',
 'Btu_th',
 'G',
 'Julian_year',
 'N_A',
 'Planck',
 'R',
 'Rydberg',
 'Stefan_Boltzmann',
 'Wien',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_codata',
 '_constants',
 '_obsolete_constants',
 'acre',
 'alpha',
 'angstrom',
 'arcmin',
 'arcminute',
 'arcsec',
 'arcsecond',
 'astronomical_unit',
 'atm',
 'atmosphere',
 'atomic_mass',
 'atto',
 'au',
 'bar',
 'barrel',
 'bbl',
 'blob',
 'c',
 'calorie',
 'calorie_IT',
 'calorie_th',
 'carat',
 'centi',
 'codata',
 'constants',
 'convert_temperature',
 'day',
 'deci',
 'degree',
 'degree_Fahrenheit',
 'deka',
 'dyn',
 'dyne',
 'e',
 'eV',
 'electron_mass',
 'electron_volt',
 'elementary_charge',
 'epsilon_0',
 'erg',
 'exa',
 'exbi',
 'femto',
 'fermi',
 'find',
 'fine_structure',
 'fluid_ounce',
 'fluid_ounce_US',
 'fluid_ounce_imp',
 'foot',
 'g',
 'gallon',
 'gallon_US',
 'gallon_imp',
 'gas_co

In [16]:
constants.pi

3.141592653589793

In [17]:
constants.gram

0.001

In [18]:
constants.metric_ton

1000.0

In [19]:
constants.degree

0.017453292519943295

In [20]:
import numpy as np
np.pi/180

0.017453292519943295

In [21]:
constants.pi/180

0.017453292519943295

In [22]:
np.sin(45*constants.degree)

0.7071067811865476

# Numpy

In [23]:
import numpy as np

In [24]:
np.array(24)

array(24)

In [25]:
np.array([1,2,3])

array([1, 2, 3])

In [27]:
np.array([[1,2,3],[4,5,6]])

array([[1, 2, 3],
       [4, 5, 6]])

In [29]:
np.array([[[1,1,1,],[2,2,2,]],[[3,3,3],[4,4,4]]])

array([[[1, 1, 1],
        [2, 2, 2]],

       [[3, 3, 3],
        [4, 4, 4]]])

In [30]:
nump_ar = np.array([[[1,1,1,],[2,2,2,]],[[3,3,3],[4,4,4]]])

In [32]:
nump_ar.shape

(2, 2, 3)

In [34]:
nump_ar.ndim

3

In [35]:
nump_are = np.array([x for x in range(1,10)])

In [36]:
nump_are

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

# Arithmetic Operations

In [37]:
A = np.array([[1,1,1],[2,2,2]])
B = np.array([3,3,3])

In [38]:
np.add(A,B)

array([[4, 4, 4],
       [5, 5, 5]])

In [40]:
np.subtract(A,B)

array([[-2, -2, -2],
       [-1, -1, -1]])

In [41]:
np.multiply(A,B)

array([[3, 3, 3],
       [6, 6, 6]])

In [42]:
np.divide(A,B)

array([[0.33333333, 0.33333333, 0.33333333],
       [0.66666667, 0.66666667, 0.66666667]])

In [44]:
np.power(A,5)

array([[ 1,  1,  1],
       [32, 32, 32]], dtype=int32)

# Conditional Logic

In [45]:
x = np.array([x for x in range(10)])

In [46]:
np.where(x%2==0,'Even','Odd')

array(['Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even',
       'Odd'], dtype='<U4')

In [47]:
condlist = [x<5,x>5]
choicelist = [x**4,x**2]

In [48]:
np.select(condlist,choicelist,default=x)

array([  0,   1,  16,  81, 256,   5,  36,  49,  64,  81])

# Common Mathematical and Statistical Functions

In [54]:
ar = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [55]:
ar

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [56]:
np.min(ar)

1

In [57]:
np.min(ar,axis=0)

array([1, 2, 3])

In [58]:
np.min(ar,axis=1)

array([1, 4, 7])

In [59]:
np.max(ar)

9

In [60]:
np.amax(ar)

9

In [61]:
np.median(ar)

5.0

In [62]:
np.mean(ar)

5.0

In [63]:
np.std(ar)

2.581988897471611

In [64]:
np.var(ar)

6.666666666666667

In [65]:
np.percentile(ar,50)

5.0

In [67]:
deg = np.array([0,30,45,60,90])

In [68]:
np.sin(deg*np.pi/180)

array([0.        , 0.5       , 0.70710678, 0.8660254 , 1.        ])

In [69]:
np.arcsin(deg*np.pi/180)  # inverse of sin

  np.arcsin(deg*np.pi/180)


array([0.        , 0.55106958, 0.90333911,        nan,        nan])

In [70]:
ar2 = np.array([0.1,0.5,0.8,-0.1,-0.5,-0.8])

In [71]:
np.floor(ar2)

array([ 0.,  0.,  0., -1., -1., -1.])

In [72]:
np.ceil(ar2)

array([ 1.,  1.,  1., -0., -0., -0.])

In [73]:
np.round(ar2)

array([ 0.,  0.,  1., -0., -0., -1.])

# Indexing and Slicing

In [74]:
ar1d = np.array([1,2,3])
ar2d = np.array([[1,2,3],[4,5,6]])
ar3d = np.array([[[1,1,1,],[2,2,2,]],[[3,3,3],[4,4,4]]])

In [76]:
ar1d[0]

1

In [77]:
ar2d[0,0]

1

In [78]:
ar2d[1,-1]

6

In [79]:
ar3d[0,1,2]

2

In [80]:
ar1d[0:2]

array([1, 2])

In [82]:
ar2d[0:2,1:2]

array([[2],
       [5]])

In [83]:
ar2d[-2:-3:-1]

array([[1, 2, 3]])

In [84]:
ar3d[-2:-3:-1]

array([[[1, 1, 1],
        [2, 2, 2]]])

# File Handling

In [90]:
# ar = np.loadtxt('File location\data.csv',delimiter=',',dtype=str)
# ar = np.genfromtxt('File location\data.csv',delimiter=',',dtype=str)

import pandas as pd
df = pd.read_excel(r'Downloads\Employee Data.xlsx')

In [91]:
numar = df.to_numpy()

In [92]:
numar

array([['EID_1001', 'Phillina Ober', '35 - 44', 'R&D', 30038],
       ['EID_1002', 'Filia McAdams', '45 - 54', 'R&D', 14929],
       ['EID_1003', 'Mick Brown', '35 - 44', 'Sales', 48855],
       ['EID_1004', 'Maria Etezadi', 'Under 25', 'R&D', 62186],
       ['EID_1005', 'Kean Thornton', '25 - 34', 'HR', 59714],
       ['EID_1006', "Jack O'Briant", '25 - 34', 'Sales', 35641],
       ['EID_1007', 'Maria Etezadi', 'Above 55', 'Sales', 69549],
       ['EID_1008', 'Aaron Bergman', '25 - 34', 'HR', 25868],
       ['EID_1009', 'Craig Molinari', 'Above 55', 'R&D', 79420],
       ['EID_1010', 'Lycoris Saunders', '35 - 44', 'HR', 53284],
       ['EID_1011', 'Nora Pelletier', 'Above 55', 'Sales', 71534],
       ['EID_1012', 'Craig Molinari', '25 - 34', 'R&D', 59598],
       ['EID_1013', 'Chloris Kastensmidt', '25 - 34', 'HR', 96785],
       ['EID_1014', 'Vivek Sundaresam', '45 - 54', 'R&D', 80045],
       ['EID_1015', 'Scott Williamson', '45 - 54', 'Sales', 77117],
       ['EID_1016', 'Alan Shon

In [95]:
ar2d = np.array([[1,2,3],[4,5,6]])

In [96]:
np.savetxt('2D_array.csv',ar2d,delimiter=',')

In [97]:
np.save('2D.npy',ar2d)

In [98]:
np.load('2D.npy')

array([[1, 2, 3],
       [4, 5, 6]])