-
Notifications
You must be signed in to change notification settings - Fork 0
/
curve_fit.py
110 lines (75 loc) · 4.02 KB
/
curve_fit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# ECOR 1042 Lab 6 - Individual submission for curve_fit function
__author__ = "Elyssa Grant"
__team__ = "T-83"
#==========================================#
# Place your curve_fit function after this line
def curve_fit(dict_list: list[dict], compare: str, poly: int) -> str:
"""
Given a list of dictionaries, a value to sort by and a polynomial value, the function will do linear regression or interpolation to determine the line of best fit and return a string equation of best fit.
Preconditions: All dictionaries contain G_Avg, all dictionaries have the same keys, the key to be compared with holds an integer or float value.
Examples:
>>>curve_fit([{"G_Avg": 4, "Health": 4}, {"G_Avg": 8, "Health": 3}, {"G_Avg": 6.7, "Health": 4}], "Health", 1)
'-2.65 x + 15.95'
>>>curve_fit([{"StudyTime": 1.0, "G_Avg": 3}, {"StudyTime": 2.0, "G_Avg": 5}, {"StudyTime": 3.0, "G_Avg": 8}, {"StudyTime": 4.0, "G_Avg": 9}], "StudyTime", 4)
'-0.5x^3 + 3.5x^2 + -5x + 5'
>>>curve_fit([{"G_Avg": 9.3, "StudyTime": 2.5, "Failures": 1}, {"G_Avg": 6.4, "StudyTime": 2.5, "Failures": 0}, {"G_Avg": 8.5, "StudyTime": 2, "Failures": 1}, {"G_Avg": 9.0, "StudyTime": 1.3, "Failures": 0}, {"G_Avg": 6.7, "StudyTime": 2.5, "Failures": 2}], "Failures", 2)
'-1.7x^2 + 2.9x + 7.7'
"""
import numpy as np
# declaring variables
attribute_list = []
average_dict = {}
minimum = dict_list[0][compare]
maximum = dict_list[0][compare]
# looping over the list and creating a list of tuples containing only the important data
for i in range(len(dict_list)):
attribute_list += [(dict_list[i][compare], dict_list[i]["G_Avg"])]
# determining the minimum and maximum values the dictonaries have for the comparable data
if dict_list[i][compare] < minimum:
minimum = dict_list[i][compare]
elif dict_list[i][compare] > maximum:
maximum = dict_list[i][compare]
# looping across the range of values the comparable value
for value in range(int(minimum), int(maximum + 1)):
# creating a new key in the dictionary of compare values
average_dict[value] = 0
# resetting the counter to zero
counter = 0
# looping across the list of tuples
for i in range(len(attribute_list)):
# checking each time if the compare value is the value we want to deal with right now
if attribute_list[i][0] == value:
# adding the G_Avg values
average_dict[value] = (
average_dict[value] + attribute_list[i][1])
# counting how many G_Avg values we have added together
counter += 1
# making sure there's no runtime error by dividing by zero
if counter != 0:
# finding the average G_Avg for the compared value
average_dict[value] = average_dict[value] / counter
else:
# if there was no values of the comparable, removes the key from the dictionary
average_dict.pop(value, None)
# turning the dictionary keys into a useable list
x = list(average_dict.keys())
# turning the dictionary values into a useable list
y = list(average_dict.values())
return_string = ""
# checking if use linear regression or interpolation
if len(x) > poly:
# linear regression
coeffs = np.polyfit(x, y, poly)
# creating return string to give back
for l in range(poly - 1):
return_string += str(coeffs[l]) + "x^" + str((poly - l)) + "+"
else:
# linear interpolation
coeffs = np.polyfit(x, y, len(x) - 1)
# creating return string
for l in range(len(x) - 2):
return_string += str(coeffs[l]) + \
"x^" + str((len(x) - 1 - l)) + "+"
# adding the constant onto the end of the string
return_string += str(coeffs[-2]) + "x+" + str(coeffs[-1])
return return_string