Skip to content

Commit

Permalink
Hotfix: Ensure multiple Pods don't get created for a GameServer
Browse files Browse the repository at this point in the history
Since the cache is eventually consistent, there was a race condition
in which a GameServer could create more than one Pod backing it,
as the first one isn't found in the cache yet.

This sync's the cache before checking this value, so the cache
is up to date.
  • Loading branch information
markmandel committed Aug 28, 2018
1 parent e7e7274 commit eb26a9d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
15 changes: 13 additions & 2 deletions pkg/gameservers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,15 @@ type Controller struct {
crdGetter v1beta1.CustomResourceDefinitionInterface
podGetter typedcorev1.PodsGetter
podLister corelisterv1.PodLister
podSynced cache.InformerSynced
gameServerGetter getterv1alpha1.GameServersGetter
gameServerLister listerv1alpha1.GameServerLister
gameServerSynced cache.InformerSynced
nodeLister corelisterv1.NodeLister
portAllocator *PortAllocator
healthController *HealthController
workerqueue *workerqueue.WorkerQueue
stop <-chan struct{}
recorder record.EventRecorder
}

Expand All @@ -84,6 +86,7 @@ func NewController(
agonesClient versioned.Interface,
agonesInformerFactory externalversions.SharedInformerFactory) *Controller {

pods := kubeInformerFactory.Core().V1().Pods()
gameServers := agonesInformerFactory.Stable().V1alpha1().GameServers()
gsInformer := gameServers.Informer()

Expand All @@ -92,7 +95,8 @@ func NewController(
alwaysPullSidecarImage: alwaysPullSidecarImage,
crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(),
podGetter: kubeClient.CoreV1(),
podLister: kubeInformerFactory.Core().V1().Pods().Lister(),
podLister: pods.Lister(),
podSynced: pods.Informer().HasSynced,
gameServerGetter: agonesClient.StableV1alpha1(),
gameServerLister: gameServers.Lister(),
gameServerSynced: gsInformer.HasSynced,
Expand Down Expand Up @@ -127,7 +131,7 @@ func NewController(
})

// track pod deletions, for when GameServers are deleted
kubeInformerFactory.Core().V1().Pods().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
pods.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: func(obj interface{}) {
// Could be a DeletedFinalStateUnknown, in which case, just ignore it
pod, ok := obj.(*corev1.Pod)
Expand Down Expand Up @@ -221,6 +225,8 @@ func (c *Controller) creationValidationHandler(review admv1beta1.AdmissionReview
// Run the GameServer controller. Will block until stop is closed.
// Runs threadiness number workers to process the rate limited queue
func (c *Controller) Run(workers int, stop <-chan struct{}) error {
c.stop = stop

err := crd.WaitForEstablishedCRD(c.crdGetter, "gameservers.stable.agones.dev", c.logger)
if err != nil {
return err
Expand Down Expand Up @@ -362,6 +368,11 @@ func (c *Controller) syncGameServerCreatingState(gs *v1alpha1.GameServer) (*v1al

c.logger.WithField("gs", gs).Info("Syncing Create State")

// Wait for pod cache sync, so that we don't end up with multiple pods for a GameServer
if !(cache.WaitForCacheSync(c.stop, c.podSynced)) {
return nil, errors.New("could not sync pod cache state")
}

// Maybe something went wrong, and the pod was created, but the state was never moved to Starting, so let's check
ret, err := c.listGameServerPods(gs)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/gameservers/localsdk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func TestLocalSDKServerSetLabel(t *testing.T) {
},
}

for k,v := range fixtures {
for k, v := range fixtures {
t.Run(k, func(t *testing.T) {
ctx := context.Background()
e := &sdk.Empty{}
Expand Down

0 comments on commit eb26a9d

Please sign in to comment.