In [2]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = pd.concat([df] * 3, ignore_index=True).sort_values(by=df.index.repeat(3)).reset_index(drop=True)

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6


KeyError: Index([0, 0, 0, 1, 1, 1, 2, 2, 2], dtype='int64')

In [3]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6

Modified DataFrame:
   A  B
0  1  4
1  1  4
2  1  4
3  2  5
4  2  5
5  2  5
6  3  6
7  3  6
8  3  6


In [4]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Create the new column with 0s and set 1 at the specific index for each set of repeated rows
repeated_df['Flag'] = 0
repeated_df.loc[::3, 'Flag'] = 1  # Setting 1 at the first index of each set of repeated rows

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6

Modified DataFrame:
   A  B  Flag
0  1  4     1
1  1  4     0
2  1  4     0
3  2  5     1
4  2  5     0
5  2  5     0
6  3  6     1
7  3  6     0
8  3  6     0


In [5]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C':[1,2,1]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Create the new 'Indicator' column with default 0s
repeated_df['Indicator'] = 0

# Set the 'Indicator' column to 1 at the specific indices
for idx, row in df.iterrows():
    repeated_df.loc[repeated_df.index[(idx * 3):(idx * 3 + 3)], 'Indicator'] = [1, 0, 0]

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6

Modified DataFrame:
   A  B  Indicator
0  1  4          1
1  1  4          0
2  1  4          0
3  2  5          1
4  2  5          0
5  2  5          0
6  3  6          1
7  3  6          0
8  3  6          0


In [6]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [1, 2, 1]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Add the new column with 0s and 1s
repeated_df['D'] = 0
repeated_df.loc[repeated_df.index % 3 == 0, 'D'] = 1

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B  C
0  1  4  1
1  2  5  2
2  3  6  1

Modified DataFrame:
   A  B  C  D
0  1  4  1  1
1  1  4  1  0
2  1  4  1  0
3  2  5  2  1
4  2  5  2  0
5  2  5  2  0
6  3  6  1  1
7  3  6  1  0
8  3  6  1  0


In [9]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [1, 2, 1]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Add the new column 'D' with 0s
repeated_df['D'] = 0

# Set the value to 1 at the specified indices
for original_index in df.index:
    repeated_indices = repeated_df.index[original_index*3 : original_index*3 + 3]
    repeated_df.loc[repeated_indices[df.loc[original_index, 'C']-1], 'D'] = 1

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)

original_sums = df[['A', 'B']].sum(axis=1)

# Repeat the sums to match the repeated rows
repeated_sums = original_sums.repeat(3).reset_index(drop=True)

# Add the new column 'Sum' to the repeated DataFrame
repeated_df['Sum'] = repeated_sums

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B  C
0  1  4  1
1  2  5  2
2  3  6  1

Modified DataFrame:
   A  B  C  D
0  1  4  1  1
1  1  4  1  0
2  1  4  1  0
3  2  5  2  0
4  2  5  2  1
5  2  5  2  0
6  3  6  1  1
7  3  6  1  0
8  3  6  1  0

Modified DataFrame:
   A  B  C  D  Sum
0  1  4  1  1    5
1  1  4  1  0    5
2  1  4  1  0    5
3  2  5  2  0    7
4  2  5  2  1    7
5  2  5  2  0    7
6  3  6  1  1    9
7  3  6  1  0    9
8  3  6  1  0    9


In [8]:
import pandas as pd

# Sample DataFrame
data = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [1, 2, 1]
}

df = pd.DataFrame(data)

# Print the original DataFrame
print("Original DataFrame:")
print(df)

# Repeat each row 3 times (including the original)
repeated_df = df.loc[df.index.repeat(3)].reset_index(drop=True)

# Add the new column 'D' with 0s
repeated_df['D'] = 0

# Set the value to 1 at the specified indices
for original_index in df.index:
    repeated_indices = repeated_df.index[original_index*3 : original_index*3 + 3]
    repeated_df.loc[repeated_indices[df.loc[original_index, 'C']-1], 'D'] = 1

# Calculate the sum of values of 'A' and 'B' for the same ID
original_sums = df[['A', 'B']].sum(axis=1)

# Repeat the sums to match the repeated rows
repeated_sums = original_sums.repeat(3).reset_index(drop=True)

# Add the new column 'Sum' to the repeated DataFrame
repeated_df['Sum'] = repeated_sums

# Print the modified DataFrame
print("\nModified DataFrame:")
print(repeated_df)


Original DataFrame:
   A  B  C
0  1  4  1
1  2  5  2
2  3  6  1

Modified DataFrame:
   A  B  C  D  Sum
0  1  4  1  1    5
1  1  4  1  0    5
2  1  4  1  0    5
3  2  5  2  0    7
4  2  5  2  1    7
5  2  5  2  0    7
6  3  6  1  1    9
7  3  6  1  0    9
8  3  6  1  0    9


In [13]:
import numpy as np

# Define X and theta matrices
X = np.array([
    [10, 0, 0,1],
    [1, 2, 1,1]
])
y = np.array([1,2])

theta = np.array([
    [1.27643114, -0.61920665, -2.72269309, 0.2],
    [2.1531517, -0.40006169, -0.67794147 , 0.33],
    [0.05752646, -0.7151739, -1.31920894, 1.22132]
])
print(theta.shape)
# Compute z = X * theta
z = np.dot(X, theta.T)

print(z)

ll = (z[np.arange(X.shape[0]), y])
print(ll)

(3, 4)
[[12.9643114  21.861517    1.7965846 ]
 [-2.48467525  1.00508685 -1.47071028]]
[21.861517   -1.47071028]


In [8]:
np.sum(np.dot(X, theta.T))

33.67211432

In [4]:
10*2.1531517+ 0.33

21.861517

In [12]:
print(np.exp(z))

[[3.49518370e+05 2.04559516e-03 1.49793908e-12]
 [2.81536190e+02 1.18303263e-01 4.52659639e-03]]


In [15]:
np.exp(-6.1920665)

0.0020455951600901407

In [17]:
np.sum(np.exp(z), axis=1, keepdims=True)

array([[3.49518372e+05],
       [2.81659020e+02]])

In [18]:
print(np.exp(z)/np.sum(np.exp(z), axis=1, keepdims=True))

[[9.99999994e-01 5.85261126e-09 4.28572344e-18]
 [9.99563906e-01 4.20022986e-04 1.60711927e-05]]


In [19]:
2.04559516e-03/3.49518372e+05

5.852611261304457e-09

In [21]:
np.sum(np.exp(z))

349800.0310387659

In [22]:
import numpy as np

num_samples = 5
y = [10, 20, 30, 40, 50]

result = [np.arange(num_samples), y]
print(result)


[array([0, 1, 2, 3, 4]), [10, 20, 30, 40, 50]]


In [23]:
X_null = np.ones((10, 3))  # Only intercept

In [25]:
print(X_null)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
