In [None]:
#1. Import libraries/packages
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error

#2. Loading the data
data = pd.read_csv('datasets/Fish.csv')
print(data.sample(5))
print(data.shape)
print(data.isnull().sum())
print(data.columns)
data.dropna()
print(data.shape)
X = data[['Length1','Length2','Length3','Height','Width']]
y = data['Weight']

#3. Visualizing the data
plt.subplot(2,2,1)
plt.scatter(X['Length1'],y,color='red',label='Length1')
plt.subplot(2,2,2)
plt.scatter(X['Length2'],y,color='red',label='Length2')
plt.subplot(2,2,3)
plt.scatter(X['Length3'],y,color='red',label='Length3')
plt.subplot(2,2,4)
plt.scatter(X['Height'],y,color='blue',label='Height')
plt.scatter(X['Width'],y,color='green',label='Width')
plt.show()

#4. Splitting our Data set in Dependent and Independent variables.
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=15)


#5. Performing simple linear regression
regressor= LinearRegression()
regressor.fit(X_train,y_train)
#Test Accuracy
accuracy = regressor.score(X_test,y_test)
print("\n\n Accuracy of model =",accuracy)
print("Coeficients",regressor.coef_)
print("Intercepts",regressor.intercept_)

#6. Residual analysis(Check the results of model fitting to know whether the model is satisfactory)
plt.subplot(2,2,1)
plt.scatter(X_test['Length1'],y_test,color='green')
plt.plot(X_train['Length1'],regressor.predict(X_train),color="red",linewidth=1)

plt.subplot(2,2,2)
plt.scatter(X_test['Length2'],y_test,color='green')
plt.plot(X_train['Length2'],regressor.predict(X_train),color="red",linewidth=1)

plt.subplot(2,2,3)
plt.scatter(X_test['Length3'],y_test,color='green')
plt.plot(X_train['Length3'],regressor.predict(X_train),color="red",linewidth=1)

plt.subplot(2,2,4)
plt.scatter(X_test['Height'],y_test,color='green')
plt.scatter(X_test['Width'],y_test,color='blue')
plt.plot(X_train[['Height']],regressor.predict(X_train),color="red",linewidth=1)
plt.plot(X_train[['Width']],regressor.predict(X_train),color="red",linewidth=1)

plt.title('Regression(Test Set)')
plt.xlabel('Independent')
plt.ylabel('Weight')
plt.show()

plt.subplot(2,2,1)
plt.scatter(X_train['Length1'],y_train,color="blue")
plt.plot(X_train['Length1'],regressor.predict(X_train),color="red",linewidth=1)
plt.subplot(2,2,2)
plt.scatter(X_train['Length2'],y_train,color="blue")
plt.plot(X_train['Length2'],regressor.predict(X_train),color="red",linewidth=1)
plt.subplot(2,2,3)
plt.plot(X_train['Length3'],regressor.predict(X_train),color="red",linewidth=1)
plt.subplot(2,2,4)
plt.scatter(X_train['Height'],y_train,color="green")
plt.scatter(X_train['Width'],y_train,color="blue")
plt.plot(X_train[['Height']],regressor.predict(X_train),color="red",linewidth=1)
plt.plot(X_train[['Width']],regressor.predict(X_train),color="red",linewidth=1)

plt.title('regression training set')
plt.xlabel('Independent')
plt.ylabel('Weight')
plt.show()

#7. Predictions on the test set (apply the model)
y_pred=regressor.predict(X_test)
print(f"r2 score {r2_score(y_test,y_pred)}")
print(f"mean error { mean_squared_error(y_test,y_pred)}")

x = [[23.2,25.4,30,11.52,4.02]]
Weight=regressor.predict(x)
print(f"Weight will be : {Weight}")

In [None]:
#1. Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#2.  Read the data, encode the data
#create the sample dataset
transactions = [['Bread','Milk'],
                ['Bread','Diaper','Beer','Eggs'],
                ['Milk','Diaper','Beer','Coke'],
                ['Bread','Milk','Diaper','Beer'],
                ['Bread','Milk','Diaper','Coke']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#1. Import the libraries
import csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd

#2.  Read the data, encode the data
#create the sample dataset
dataset = []
with open('datasets/basket.csv','r') as csvfile:
    reader = csv.reader(csvfile,delimiter=',')
    for row in reader:
        dataset+=[row]
te = TransactionEncoder()
data = te.fit_transform(dataset)
data = pd.DataFrame(data,columns=te.columns_)
print(data)
print(data.shape)

#3. Find the frequent Itemsets
freq_items=apriori(data,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules1 = association_rules(freq_items,metric='confidence',min_threshold=0.8)
rules2 = association_rules(freq_items,metric='lift',min_threshold=10)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

""" rules['ante_len'] = rules['antecedents'].apply(lambda x:len(x))
nrules = rules[(rules['ante_len'] <= 3) &
               (rules['confidence']>= 0.9) &
               (rules['lift'] >= 2.0)]
nrules = rules[rules['consequents'] == {'whole milk'}]
nrules = rules[rules['antecedents'] == {'cereals','curd'}]
nrules
nrules = rules[['antecedents','consequents','confidence']]
nrules.head()
nrules.to_csv('rules.csv')
 """
""" Apriori algorithm

4. Now, Convert Pandas DataFrame into a list of lists for encoding
transactions = []
for i in range(0, len(df)):
transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns))])
5. Apply TransformEncoder to the transactions list
6. Apply the apriori algorithm
 """

In [None]:
#1. Import the libraries
import csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd

#2.  Read the data, encode the data
#create the sample dataset
dataset = []
with open('datasets/groceries.csv','r') as csvfile:
    reader = csv.reader(csvfile,delimiter=',')
    for row in reader:
        dataset+=[row]
te = TransactionEncoder()
data = te.fit_transform(dataset)
data = pd.DataFrame(data,columns=te.columns_)
print(data)
print(data.shape)

#3. Find the frequent Itemsets
freq_items=apriori(data,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
#1. Import the libraries
import csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd

#2.  Read the data, encode the data
#create the sample dataset
dataset = []
with open('datasets/basket.csv','r') as csvfile:
    reader = csv.reader(csvfile,delimiter=',')
    for row in reader:
        dataset+=[row]
te = TransactionEncoder()
data = te.fit_transform(dataset)
data = pd.DataFrame(data,columns=te.columns_)
print(data)
print(data.shape)

#3. Find the frequent Itemsets
freq_items=apriori(data,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules1 = association_rules(freq_items,metric='confidence',min_threshold=0.8)
rules2 = association_rules(freq_items,metric='lift',min_threshold=10)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

""" rules['ante_len'] = rules['antecedents'].apply(lambda x:len(x))
nrules = rules[(rules['ante_len'] <= 3) &
               (rules['confidence']>= 0.9) &
               (rules['lift'] >= 2.0)]
nrules = rules[rules['consequents'] == {'whole milk'}]
nrules = rules[rules['antecedents'] == {'cereals','curd'}]
nrules
nrules = rules[['antecedents','consequents','confidence']]
nrules.head()
nrules.to_csv('rules.csv')
 """
""" Apriori algorithm

4. Now, Convert Pandas DataFrame into a list of lists for encoding
transactions = []
for i in range(0, len(df)):
transactions.append([str(df.values[i,j]) for j in range(0, len(df.columns))])
5. Apply TransformEncoder to the transactions list
6. Apply the apriori algorithm
 """



In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['eggs', 'milk','bread'],
                ['eggs', 'apple'],
                ['milk', 'bread'],
                ['apple', 'milk'],
                ['milk', 'apple', 'bread']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['eggs', 'milk','bread'],
                ['eggs', 'apple'],
                ['milk', 'bread'],
                ['apple', 'milk'],
                ['milk', 'apple', 'bread']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['butter','bread','milk'],
                ['butter',' flour','milk','sugar'],
                ['butter', 'eggs', 'milk','salt'],
                ['eggs'],
                ['butter','flour','milk','salt']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['Bread','Milk'],
                ['Bread','Diaper','Beer','Eggs'],
                ['Milk','Diaper','Beer','Coke'],
                ['Bread','Milk','Diaper','Beer'],
                ['Bread','Milk','Diaper','Coke']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['Apple','Mango','Banana'],
                ['Mango','Banana','Cabbage','Carrots'],
                ['Mango','Banana','Carrots'],
                ['Mango','Carrots']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#1. Import libraries/packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error

#2. Loading the data
df = pd.read_csv('datasets/User_Data.csv')
print(df.sample(5))
print(df.shape)
print(df.value_counts())
print(df.isnull().sum())
print(df.columns)
df.dropna()
print(df.shape)

X=np.array(df[['EstimatedSalary']])
y=np.array(df[['Purchased']])

"""
X =  df[['Height']]
y = df['Weight']
 """
print(X.shape)
print(y.shape)

#3. Visualizing the data
plt.scatter(X,y,color="red")
plt.title('Estimated Salary vs Purchased')
plt.xlabel('Estimated Salary')
plt.ylabel('Purchased')
plt.show()

#4. Splitting our Data set in Dependent and Independent variables.
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=15)

#5. Performing simple linear regression
regressor= LinearRegression()
regressor.fit(X_train,y_train)
#Test Accuracy
accuracy = regressor.score(X_test,y_test)
print("\n\n Accuracy of model =",accuracy)
print("Coeficients",regressor.coef_)
print("Intercepts",regressor.intercept_)

#6. Residual analysis(Check the results of model fitting to know whether the model is satisfactory)
plt.scatter(X_test,y_test,color='green')
plt.plot(X_train,regressor.predict(X_train),color="red",linewidth=3)
plt.title('Regression(Test Set)')
plt.xlabel('Estimated Salary')
plt.ylabel('Purchased')
plt.show()
plt.scatter(X_train,y_train,color="blue")
plt.plot(X_train,regressor.predict(X_train),color="red",linewidth=3)
plt.title('regression training set')
plt.xlabel('Estimated Salary')
plt.ylabel('Purchased')
plt.show()

#7. Predictions on the test set (apply the model)
y_pred=regressor.predict(X_test)
print(f"r2 score {r2_score(y_test,y_pred)}")
print(f"mean error { mean_squared_error(y_test,y_pred)}")

Height = 165
result=regressor.predict(np.array(Height).reshape(1,-1))
Weight=result[0,0]
print(f"Weight will be : {Weight}")

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['Bread','Milk'],
                ['Bread','Diaper','Beer','Eggs'],
                ['Milk','Diaper','Beer','Coke'],
                ['Bread','Milk','Diaper','Beer'],
                ['Bread','Milk','Diaper','Coke']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
#1. Import libraries/packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error

#2. Loading the data
df = pd.read_csv('datasets/Car.csv')
df = df[df['Make']=='Audi']
print(df.shape)
print(df.isnull().sum())
print(df.columns)
df = df.dropna()
print(df.shape)
print(df.sample(5))

X=np.array(df[['EngineHP']]) #X = df[['']]
y=np.array(df[['MSRP']]) #y = df['']

#3. Visualizing the data
plt.scatter(X,y,color="red")
plt.title('EngineHP vs MSRP')
plt.xlabel('EngineHP')
plt.ylabel('MSRP')
plt.show()

#4. Splitting our Data set in Dependent and Independent variables.
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=15)

#5. Performing simple linear regression
regressor= LinearRegression()
regressor.fit(X_train,y_train)
#Test Accuracy
accuracy = regressor.score(X_test,y_test)
print("\n\n Accuracy of model =",accuracy)
print("Coeficients",regressor.coef_)
print("Intercepts",regressor.intercept_)

#6. Residual analysis(Check the results of model fitting to know whether the model is satisfactory)
plt.scatter(X_test,y_test,color='green')
plt.plot(X_train,regressor.predict(X_train),color="red",linewidth=3)
plt.title('Regression(Test Set)')
plt.xlabel('EngineHP')
plt.ylabel('MSRP')
plt.show()
plt.scatter(X_train,y_train,color="blue")
plt.plot(X_train,regressor.predict(X_train),color="red",linewidth=3)
plt.title('regression training set')
plt.xlabel('EngineHP')
plt.ylabel('MSRP')
plt.show()

#7. Predictions on the test set (apply the model)
y_pred=regressor.predict(X_test)
print(f"r2 score {r2_score(y_test,y_pred)}")
print(f"mean error { mean_squared_error(y_test,y_pred)}")

EngineHP = 276
result=regressor.predict(np.array(EngineHP).reshape(1,-1))
MSRP=result[0,0]
print(f"MSRP will be : {MSRP}")

In [None]:
#Import the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

#1.  Read the data, encode the data
#create the sample dataset
transactions = [['eggs', 'milk','bread'],
                ['eggs', 'apple'],
                ['milk', 'bread'],
                ['apple','milk'],
                ['milk','apple','bread']]

#transform it into the right format via Transactioon Encoder as follows:
te=TransactionEncoder()
te_arrary=te.fit(transactions).transform(transactions)
df=pd.DataFrame(te_arrary,columns=te.columns_)
print(df)

#3. Find the frequent Itemsets
freq_items=apriori(df,min_support=0.5,use_colnames=True)
print(freq_items)

#4. Generate the association rules
rules = association_rules(freq_items,metric='support',min_threshold=0.05)
rules = rules.sort_values(['support','confidence'],ascending=[False,False])
print(rules)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("data.csv")
print(df)
print(df.head())
print(df.tail())
print("Dataset Info")
#q2
print(df.shape)
print(df.size)
print(df.dtypes)
#q3
print("basic statisticsl details:")
print(df.describe())
#q4
print("\n sum of None values\n",df.isna(),sum())
print("\n total duplicate values\n",df.duplicated(),sum())
print("\n total no. of record \n",df.size())
#q5
df['BMI'] = (df['weight'])/(df['height']**2)
#q6
print("maximum BMI : ",max(df['BMI']))
print("minimum BMI : ",min(df['BMI']))
#q7
plt.scatter(df['height'],df['weight'])
plt.show()