In [6]:
import pandas as pd

# Create a list of survey questions
questions = ['How satisfied are you with our product?', 'How likely are you to recommend our product to a friend?',
             'What do you like most about our product?', 'What do you like least about our product?',
             'What can we do to improve our product?', 'Did you have any technical difficulties using our product?',
             'How was your experience with our customer support team?', 'How long have you been using our product?',
             'How frequently do you use our product?', 'How easy was it to use our product?']

# Create a dictionary to store the survey responses
responses = {}

# Collect responses from 10 individuals
for i in range(1, 11):
    # Create an empty list to store the responses for this individual
    individual_responses = []
    
    # Collect responses for each question
    for question in questions:
        response = input(f'Individual {i}, {question}: ')
        individual_responses.append(response)
    
    # Add the responses to the dictionary
    responses[f'Individual {i}'] = individual_responses

# Convert the responses dictionary to a Pandas DataFrame
df = pd.DataFrame.from_dict(responses, orient='index', columns=questions)

# Save the DataFrame to a CSV file
df.to_csv('survey_data.csv', index_label='Individual')


Individual 1, How satisfied are you with our product?: 5
Individual 1, How likely are you to recommend our product to a friend?: 4
Individual 1, What do you like most about our product?: Design
Individual 1, What do you like least about our product?: price
Individual 1, What can we do to improve our product?: lower the price
Individual 1, Did you have any technical difficulties using our product?: NO
Individual 1, How was your experience with our customer support team?: 5
Individual 1, How long have you been using our product?: 2 yr
Individual 1, How frequently do you use our product?: 1
Individual 1, How easy was it to use our product?: 5
Individual 2, How satisfied are you with our product?: 5
Individual 2, How likely are you to recommend our product to a friend?: 5
Individual 2, What do you like most about our product?: quality
Individual 2, What do you like least about our product?: design
Individual 2, What can we do to improve our product?: improve design quality
Individual 2, Di

In [7]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt


def preprocess_text(text):
    """
    Function to preprocess text data
    """
    # Tokenize the text
    tokens = word_tokenize(text.lower())
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    # Join the tokens back into text
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text


def cluster_questions(df, num_clusters):
    """
    Function to cluster the questions using K-means clustering
    """
    # Preprocess the text data
    df['preprocessed_text'] = df['text'].apply(preprocess_text)
    # Generate the TF-IDF vectors
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform(df['preprocessed_text'])
    # Cluster the questions using K-means clustering
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    kmeans.fit(vectors)
    df['cluster'] = kmeans.labels_
    # Perform PCA for visualization
    pca = PCA(n_components=2)
    vectors_2d = pca.fit_transform(vectors.toarray())
    # Plot the clusters
    plt.figure(figsize=(10, 10))
    colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
    for i in range(num_clusters):
        plt.scatter(vectors_2d[kmeans.labels_ == i, 0], vectors_2d[kmeans.labels_ == i, 1], c=colors[i], label='Cluster {}'.format(i))
    plt.legend()
    plt.show()
    return df


def analyze_clusters(df):
    """
    Function to analyze the clusters and provide feedback
    """
    # Get the most frequent words in each cluster
    vectorizer = TfidfVectorizer()
    cluster_terms = {}
    for i in range(df['cluster'].nunique()):
        cluster_df = df[df['cluster'] == i]
        vectors = vectorizer.fit_transform(cluster_df['preprocessed_text'])
        terms = vectorizer.get_feature_names()
        scores = vectors.sum(axis=0).tolist()[0]
        term_scores = list(zip(terms, scores))
        term_scores = sorted(term_scores, key=lambda x: x[1], reverse=True)[:5]
        cluster_terms[i] = term_scores
    # Print the feedback
    for i in range(df['cluster'].nunique()):
        cluster_df = df[df['cluster'] == i]
        if i == 0:
            feedback = 'Overall, things seem to be going well. '
        else:
            feedback = 'There seem to be some issues in this area. '
        feedback += 'The most frequent words in this area are: {}. '.format(', '.join([term_score[0] for term_score in cluster_terms[i]]))
        feedback += 'Some suggestions for improvement include: '
        for term_score in cluster_terms[i]:
            feedback += 'Increase the focus on "{}", '.format(term_score[0])
        print('Cluster {}: {}'.format(i, feedback[:-2]))


# Load the survey data
df


Unnamed: 0,How satisfied are you with our product?,How likely are you to recommend our product to a friend?,What do you like most about our product?,What do you like least about our product?,What can we do to improve our product?,Did you have any technical difficulties using our product?,How was your experience with our customer support team?,How long have you been using our product?,How frequently do you use our product?,How easy was it to use our product?
Individual 1,5,4,Design,price,lower the price,NO,5,2 yr,1,5
Individual 2,5,5,quality,design,improve design quality,no,5,7,3,5
Individual 3,4,3,feature,,reduce the price,yes,3,0,0,3
Individual 4,1,2,design,quality,improve quality,yes,3,1,1,3
Individual 5,4,4,price,design,make more design,no,4,3,1,5
Individual 6,5,yes,quality,feature,add new feature,no,5,4,1,5
Individual 7,1,1,,,reduce price,yes,1,1,1,1
Individual 8,5,5,design,quality,make good quality,no,5,3,2,5
Individual 9,3,3,feature,quality,good quality,no,3,2,1,4
Individual 10,5,5,price,features,add new features,no,5,4,1,5


In [None]:
question,answer_1,answer_2,answer_3,answer_4,answer_5,answer_6,answer_7,answer_8,answer_9,answer_10
How satisfied are you with our product?,5,4,3,5,2,1,4,5,3,4
How likely are you to recommend our product to others?,4,3,2,5,1,1,4,5,2,3
How easy was it to use our product?,4,3,2,5,1,1,4,5,2,3
How did you hear about our product?,TV ad,Online search,Social media,Word of mouth,TV ad,Social media,Online search,Word of mouth,Online search,Social media
Did our product meet your expectations?,Yes,Yes,No,Yes,No,No,Yes,Yes,No,Yes
What did you like most about our product?,Design,Features,Price,Quality,Design,Quality,Features,Design,Quality,Price
What did you like least about our product?,Ease of use,Price,Design,Features,Quality,Features,Quality,Design,Price,Ease of use
How can we improve our product?,Add more features,Lower the price,Improve the design,Improve the quality,Add more features,Improve the quality,Improve the design,Add more features,Improve the quality,Lower the price
Did you have any issues with our product?,No,Yes,No,Yes,No,Yes,No,Yes,No,No
How satisfied are you with our customer service?,5,4,3,5,2,1,4,5,3,4