In [1]:
import pandas as pd
import re

In [2]:
car=pd.read_csv('mechanic dataset pune.csv')

In [3]:
def extract_address_info(address_str):
    # Define regex patterns for pincode and city extraction
    pincode_pattern = r'\b\d{6}\b'
    city_pattern = r'(?<=, )[\w\s]+'

    # Find pincode and city using regex
    pincode = re.search(pincode_pattern, address_str)
    city = re.search(city_pattern, address_str)

    return pincode.group() if pincode else None, city.group() if city else None

In [4]:
car['Pincode'], car['City'] = zip(*car['Adddress'].apply(lambda x: extract_address_info(x)))

In [5]:
print(car[['Adddress', 'Pincode', 'City']])

                                              Adddress Pincode  \
0    Dr Ambedkar Co Operative Society, Yerawada, Pu...  411006   
1    39/87, opp. ALANKAR POLICE STATION, Ganesh Nag...  411038   
2    Shitala Devi Chowk Rd, Guruwar Peth, Pune, Mah...  411042   
3    310, Ghorpade Peth Rd, Ghorpade Peth, Swargate...  411042   
4    169, NC Kelkar Road, Pate - Sampada, Pulachi W...  411030   
..                                                 ...     ...   
214  C-22, Crystal Plaza, Baner, Pune, Maharashtra ...  411045   
215  73, Silver Park, Kalyani Nagar, Pune, Maharash...  411014   
216  15, Sapphire Residency, Kharadi, Pune, Maharas...  411014   
217  87, Diamond Towers, Hadapsar, Pune, Maharashtr...  411028   
218  64, Platinum Enclave, Wakad, Pune, Maharashtra...  411057   

                   City  
0              Yerawada  
1                   opp  
2          Guruwar Peth  
3      Ghorpade Peth Rd  
4        NC Kelkar Road  
..                  ...  
214       Crystal Plaza  

In [6]:
car['tags'] = car['Adddress'] + car['Pincode'].fillna('') + car['City'].fillna('')


In [7]:
new_data = car.drop(columns=['Adddress', 'phone'])

# Vectorize the text data

In [8]:
new_data

Unnamed: 0,Name,Pincode,City,tags
0,Mateen Auto Garage,411006,Yerawada,"Dr Ambedkar Co Operative Society, Yerawada, Pu..."
1,A to Z Automobile Garage,411038,opp,"39/87, opp. ALANKAR POLICE STATION, Ganesh Nag..."
2,Aayush Auto Garage,411042,Guruwar Peth,"Shitala Devi Chowk Rd, Guruwar Peth, Pune, Mah..."
3,Suryakant Auto Garage,411042,Ghorpade Peth Rd,"310, Ghorpade Peth Rd, Ghorpade Peth, Swargate..."
4,P Garage,411030,NC Kelkar Road,"169, NC Kelkar Road, Pate - Sampada, Pulachi W..."
...,...,...,...,...
214,Patel Auto Works,411045,Crystal Plaza,"C-22, Crystal Plaza, Baner, Pune, Maharashtra ..."
215,Sharma Car Garage,411014,Silver Park,"73, Silver Park, Kalyani Nagar, Pune, Maharash..."
216,Gupta Auto Solutions,411014,Sapphire Residency,"15, Sapphire Residency, Kharadi, Pune, Maharas..."
217,Khan Car Repairs,411028,Diamond Towers,"87, Diamond Towers, Hadapsar, Pune, Maharashtr..."


In [9]:
from sklearn.feature_extraction.text import CountVectorizer

In [10]:
cv=CountVectorizer(max_features=219, stop_words='english')

In [11]:
cv

In [12]:
vector=cv.fit_transform(new_data['tags'].values.astype('U')).toarray()

In [13]:
vector.shape

(219, 219)

In [14]:
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
similairty=cosine_similarity(vector)

In [16]:
similairty

array([[1.        , 0.37796447, 0.35355339, ..., 0.30151134, 0.30151134,
        0.31622777],
       [0.37796447, 1.        , 0.26726124, ..., 0.22792115, 0.34188173,
        0.23904572],
       [0.35355339, 0.26726124, 1.        , ..., 0.21320072, 0.21320072,
        0.2236068 ],
       ...,
       [0.30151134, 0.22792115, 0.21320072, ..., 1.        , 0.18181818,
        0.19069252],
       [0.30151134, 0.34188173, 0.21320072, ..., 0.18181818, 1.        ,
        0.19069252],
       [0.31622777, 0.23904572, 0.2236068 , ..., 0.19069252, 0.19069252,
        1.        ]])

In [17]:
distance=sorted(list(enumerate(similairty[2])), reverse=True,key=lambda vector:vector[1])
for i in distance[0:5]:
    print(new_data.iloc[i[0]].Name)

Aayush Auto Garage
Suryakant Auto Garage
Dharmendra Auto Garage
Patel Garage
Patel Automobile Repairs


In [18]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process



In [19]:
# def recommend(car):
#     index=new_data[new_data['tags']==car].index[0]
#     distance=sorted(list(enumerate(similairty[index])), reverse=True,key=lambda vector:vector[1])
#     for i in distance[0:5]:
#         print(new_data.iloc[i[0]].Name)

# def recommend(address):
#     # Define a threshold for fuzzy matching
#     threshold = 70
    
#     # Find similar addresses in the dataset using fuzzy matching
#     similar_addresses = process.extract(address, new_data['tags'], scorer=fuzz.token_sort_ratio)
    
#     # Extract garage names corresponding to similar addresses
#     similar_garages = []
#     for similar_address, score, index in similar_addresses:
#         if score >= threshold:
#             similar_garages.append(new_data.iloc[index]['Name'])
    
#     return similar_garages

def recommend(address, pincode, city):
    # Combine address, pincode, and city into a single string
    input_text = address + pincode + city
    
    # Define a threshold for fuzzy matching
    threshold = 70
    
    # Find similar addresses in the dataset using fuzzy matching
    similar_addresses = process.extract(input_text, new_data['tags'], scorer=fuzz.token_sort_ratio)
    
    # Extract garage names corresponding to similar addresses
    similar_garages = []
    for similar_address, score, index in similar_addresses:
        if score >= threshold:
            similar_garages.append(new_data.iloc[index]['Name'])
    
    return similar_garages


    
    

In [20]:
import pickle

In [21]:
pickle.dump(new_data,open('car_list.pkl','wb'))

In [22]:
pickle.dump(similairty, open('similarity.pkl','wb'))

In [23]:
pickle.load(open('car_list.pkl','rb'))

Unnamed: 0,Name,Pincode,City,tags
0,Mateen Auto Garage,411006,Yerawada,"Dr Ambedkar Co Operative Society, Yerawada, Pu..."
1,A to Z Automobile Garage,411038,opp,"39/87, opp. ALANKAR POLICE STATION, Ganesh Nag..."
2,Aayush Auto Garage,411042,Guruwar Peth,"Shitala Devi Chowk Rd, Guruwar Peth, Pune, Mah..."
3,Suryakant Auto Garage,411042,Ghorpade Peth Rd,"310, Ghorpade Peth Rd, Ghorpade Peth, Swargate..."
4,P Garage,411030,NC Kelkar Road,"169, NC Kelkar Road, Pate - Sampada, Pulachi W..."
...,...,...,...,...
214,Patel Auto Works,411045,Crystal Plaza,"C-22, Crystal Plaza, Baner, Pune, Maharashtra ..."
215,Sharma Car Garage,411014,Silver Park,"73, Silver Park, Kalyani Nagar, Pune, Maharash..."
216,Gupta Auto Solutions,411014,Sapphire Residency,"15, Sapphire Residency, Kharadi, Pune, Maharas..."
217,Khan Car Repairs,411028,Diamond Towers,"87, Diamond Towers, Hadapsar, Pune, Maharashtr..."


In [24]:

input_address = "56, Camp Road, Pune, Maharashtra 411001"
input_pincode = "411001"
input_city = "Pune"

# Recommend garages based on input fields
recommended_garages = recommend(input_address, input_pincode, input_city)

# Print recommended garages
if recommended_garages:
    print("Recommended Garages based on Address, Pincode, and City:")
    for garage in recommended_garages:
        print(garage)
else:
    print("No similar addresses found in the dataset.")


Recommended Garages based on Address, Pincode, and City:
Mehta Auto Solutions
Desai Car Solutions
Singh Automobile Repairs
