# Bangkit Capstone Project : Funders

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/drive/1gPsSLT7i-QIZse9PRx0O7v13qewcyeSD?usp=sharing"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/bagussatya/ml-funders/blob/main/Capstone_Notebook.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

# Dependencies & Libraries

In [1]:
#Libraries

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#Visualization
import pandas as pd

## Data Preparation

In [3]:
def get_data(link):
  df = pd.read_json(link)
  df = pd.DataFrame(df['data'].values.tolist())
  return df

In [5]:
#Importing Enterpreneurs data from database
pengusaha_df = get_data('http://35.197.140.41/api/auth/get_all_pengusaha')
pengusaha_df.head(3)

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee
0,1,Usaha Daur Ulang kertas Semen,110pengusaha@capstone.com,,89787890879,Aktif,SUMATERA UTARA,KAB. DELI SERDANG,Industri Pengolahan,SUMATERA UTARA,> 8 Juta,< 10 Orang
1,2,Usaha Nyiru Dg Kanang,111pengusaha@capstone.com,,89787890880,Aktif,SULAWESI SELATAN,KAB. GOWA,Industri Pengolahan,SULAWESI SELATAN,> 8 Juta,< 10 Orang
2,3,Ira Furniture,112pengusaha@capstone.com,,89787890881,Aktif,DKI JAKARTA,KOTA ADM. JAKARTA TIMUR,Industri Pengolahan,DKI JAKARTA,> 8 Juta,< 10 Orang


In [90]:
#Importing the Investor profile data from database
investor_df = get_data('http://35.197.140.41/api/auth/get_all_investor')
investor_df = investor_df.iloc[:1]
investor_df

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee
0,555,Oktovizurya Kurnia,okto@capstone.com,,89787891433,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang


In [11]:
# Function to clean whitespaces in columns values
def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else:
            return ''

In [66]:
# Function to soup all the features
def create_soup(x):
    return x['kategori'] + ' ' + str(x['range_fund']) + ' ' + str(x['range_employee']) + ' ' + x['lokasi']

In [14]:
# Set the features
features = ['kategori', 'range_fund', 'range_employee', 'lokasi']

In [91]:
# Combining the enterpreneurs and choosen investor data to count similarity
df_train = pengusaha_df.append(investor_df,ignore_index=True)
df_train

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee
0,1,Usaha Daur Ulang kertas Semen,110pengusaha@capstone.com,,89787890879,Aktif,SUMATERA UTARA,KAB. DELI SERDANG,Industri Pengolahan,SUMATERA UTARA,> 8 Juta,< 10 Orang
1,2,Usaha Nyiru Dg Kanang,111pengusaha@capstone.com,,89787890880,Aktif,SULAWESI SELATAN,KAB. GOWA,Industri Pengolahan,SULAWESI SELATAN,> 8 Juta,< 10 Orang
2,3,Ira Furniture,112pengusaha@capstone.com,,89787890881,Aktif,DKI JAKARTA,KOTA ADM. JAKARTA TIMUR,Industri Pengolahan,DKI JAKARTA,> 8 Juta,< 10 Orang
3,4,Usaha Tahu Suparni,113pengusaha@capstone.com,,89787890882,Aktif,JAWA TIMUR,KAB. SIDOARJO,Industri Pengolahan,JAWA TIMUR,> 8 Juta,< 10 Orang
4,5,MARJAN BUEN,114pengusaha@capstone.com,,89787890883,Aktif,NUSA TENGGARA BARAT,KAB. SUMBAWA,Kegiatan Jasa Lainnya,NUSA TENGGARA BARAT,> 8 Juta,< 10 Orang
...,...,...,...,...,...,...,...,...,...,...,...,...
550,551,MEUBEL GARUDA,660pengusaha@capstone.com,,89787891429,Aktif,KALIMANTAN TENGAH,KAB. KOTAWARINGIN BARAT,Industri Pengolahan,KALIMANTAN TENGAH,> 8 Juta,< 10 Orang
551,552,Usaha Kue Frederika,661pengusaha@capstone.com,,89787891430,Aktif,MALUKU,KOTA AMBON,Industri Pengolahan,MALUKU,> 8 Juta,< 10 Orang
552,553,Dandang Royana,662pengusaha@capstone.com,,89787891431,Aktif,SUMATERA SELATAN,KOTA PRABUMULIH,Industri Pengolahan,SUMATERA SELATAN,4 - 8 Juta,< 10 Orang
553,554,Isnaniah,663pengusaha@capstone.com,,89787891432,Aktif,KALIMANTAN SELATAN,KAB. BANJAR,Industri Pengolahan,KALIMANTAN SELATAN,< 4 Juta,< 10 Orang


In [92]:
# Applying the clean_data function to features
for feature in features:
    df_train[feature] = df_train[feature].apply(clean_data)

In [21]:
df_train.head(5)

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee
0,1,Usaha Daur Ulang kertas Semen,110pengusaha@capstone.com,,89787890879,Aktif,SUMATERA UTARA,KAB. DELI SERDANG,industripengolahan,sumaterautara,>8juta,<10orang
1,2,Usaha Nyiru Dg Kanang,111pengusaha@capstone.com,,89787890880,Aktif,SULAWESI SELATAN,KAB. GOWA,industripengolahan,sulawesiselatan,>8juta,<10orang
2,3,Ira Furniture,112pengusaha@capstone.com,,89787890881,Aktif,DKI JAKARTA,KOTA ADM. JAKARTA TIMUR,industripengolahan,dkijakarta,>8juta,<10orang
3,4,Usaha Tahu Suparni,113pengusaha@capstone.com,,89787890882,Aktif,JAWA TIMUR,KAB. SIDOARJO,industripengolahan,jawatimur,>8juta,<10orang
4,5,MARJAN BUEN,114pengusaha@capstone.com,,89787890883,Aktif,NUSA TENGGARA BARAT,KAB. SUMBAWA,kegiatanjasalainnya,nusatenggarabarat,>8juta,<10orang


In [93]:
df_train['soup'] = df_train.apply(create_soup, axis=1)
df_train['soup']=df_train['soup'].apply(str)

In [68]:
df_train.tail(3)

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee,soup
552,553,Dandang Royana,662pengusaha@capstone.com,,89787891431,Aktif,SUMATERA SELATAN,KOTA PRABUMULIH,industripengolahan,sumateraselatan,4-8juta,<10orang,industripengolahan 4-8juta <10orang sumaterase...
553,554,Isnaniah,663pengusaha@capstone.com,,89787891432,Aktif,KALIMANTAN SELATAN,KAB. BANJAR,industripengolahan,kalimantanselatan,<4juta,<10orang,industripengolahan <4juta <10orang kalimantans...
554,557,Yestika Dian Wulandari,wulan@capstone.com,,89787891435,Aktif,KALIMANTAN TIMUR,KOTA SAMARINDA,"kesenian,hiburandanrekreasi",kalimantantimur,>8juta,10-30orang,"kesenian,hiburandanrekreasi >8juta 10-30orang ..."


# Calculating the similarity

In [94]:
#calculate the similarity

count = CountVectorizer()
count_matrix = count.fit_transform(df_train['soup'])
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)

In [95]:
#Getting the index
indices = pd.Series(df_train.index, index=df_train['name'])

In [62]:
indices

name
Usaha Daur Ulang kertas Semen      0
Usaha Nyiru Dg Kanang              1
Ira Furniture                      2
Usaha Tahu Suparni                 3
MARJAN BUEN                        4
                                ... 
MEUBEL GARUDA                    550
Usaha Kue Frederika              551
Dandang Royana                   552
Isnaniah                         553
Yestika Dian Wulandari           554
Length: 555, dtype: int64

#Getting the 10 most matched with user

In [96]:
def get_recommendations(user_name, cosine_sim):
    # Get the index of the user_name
    idx = indices[user_name]

    # Get the pairwsie similarity scores of all enterpreneurs by the investor
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the enterpreneurs based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most match items with user
    sim_scores = sim_scores[1:11]

    # Get the item indices
    umkm_id = [i[0] for i in sim_scores]

    # Return the top 10 most similar items
    return pengusaha_df.iloc[umkm_id, :]

In [97]:
result = get_recommendations(investor_df['name'].item(), cosine_sim2)

In [98]:
result

Unnamed: 0,id,name,email,alamat,no_telp,status,provinsi,kabkota,kategori,lokasi,range_fund,range_employee
109,110,Moerdijatno,219pengusaha@capstone.com,,89787890988,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
113,114,CV. Kreasindo Jaya,223pengusaha@capstone.com,,89787890992,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
149,150,HS Collection,259pengusaha@capstone.com,,89787891028,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
197,198,Anyar Pigura,307pengusaha@capstone.com,,89787891076,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
245,246,Hery Craft,355pengusaha@capstone.com,,89787891124,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
308,309,Peyek Kacang Marni,418pengusaha@capstone.com,,89787891187,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KAB. BANTUL,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
316,317,Farid Collection,426pengusaha@capstone.com,,89787891195,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
360,361,Qik!,470pengusaha@capstone.com,,89787891239,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
369,370,Redi Collection,479pengusaha@capstone.com,,89787891248,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang
408,409,Insiji Kreasi,518pengusaha@capstone.com,,89787891287,Aktif,DAERAH ISTIMEWA YOGYAKARTA,KOTA YOGYAKARTA,Industri Pengolahan,DAERAH ISTIMEWA YOGYAKARTA,< 4 Juta,< 10 Orang


# Export to JSON

In [99]:
json_file = result.to_json('test result.json',orient='records')